diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index e4e943e025..4bdeef3904 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:98f3afd11308259de6e828e37376d18867fd321aba07826e29e4f8d9cab56bad -# created: 2024-02-27T15:56:18.442440378Z + digest: sha256:a8a80fc6456e433df53fc2a0d72ca0345db0ddefb409f1b75b118dfd1babd952 +# created: 2024-03-15T16:25:47.905264637Z diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 80c73d991c..b98d68799a 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -15,6 +15,7 @@ branchProtectionRules: - 'unit (3.9)' - 'unit (3.10)' - 'unit (3.11)' + - 'unit (3.12)' - 'cover' - 'Kokoro presubmit' permissionRules: diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index f059b5548a..132369f3ed 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ['3.9', '3.10', '3.11'] + python: ['3.9', '3.10', '3.11', '3.12'] steps: - name: Checkout uses: actions/checkout@v4 diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index 8e39a2cc43..bdaf39fe22 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -80,4 +80,8 @@ RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ # Test pip RUN python3 -m pip +# Install build requirements +COPY requirements.txt /requirements.txt +RUN python3 -m pip install --require-hashes -r requirements.txt + CMD ["python3.8"] diff --git a/.kokoro/docker/docs/requirements.in b/.kokoro/docker/docs/requirements.in new file mode 100644 index 0000000000..816817c672 --- /dev/null +++ b/.kokoro/docker/docs/requirements.in @@ -0,0 +1 @@ +nox diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt new file mode 100644 index 0000000000..0e5d70f20f --- /dev/null +++ b/.kokoro/docker/docs/requirements.txt @@ -0,0 +1,38 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --allow-unsafe --generate-hashes requirements.in +# +argcomplete==3.2.3 \ + --hash=sha256:bf7900329262e481be5a15f56f19736b376df6f82ed27576fa893652c5de6c23 \ + --hash=sha256:c12355e0494c76a2a7b73e3a59b09024ca0ba1e279fb9ed6c1b82d5b74b6a70c + # via nox +colorlog==6.8.2 \ + --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ + --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 + # via nox +distlib==0.3.8 \ + --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ + --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 + # via virtualenv +filelock==3.13.1 \ + --hash=sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e \ + --hash=sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c + # via virtualenv +nox==2024.3.2 \ + --hash=sha256:e53514173ac0b98dd47585096a55572fe504fecede58ced708979184d05440be \ + --hash=sha256:f521ae08a15adbf5e11f16cb34e8d0e6ea521e0b92868f684e91677deb974553 + # via -r requirements.in +packaging==24.0 \ + --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ + --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 + # via nox +platformdirs==4.2.0 \ + --hash=sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068 \ + --hash=sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768 + # via virtualenv +virtualenv==20.25.1 \ + --hash=sha256:961c026ac520bac5f69acb8ea063e8a4f071bcc9457b9c1f28f6b085c511583a \ + --hash=sha256:e08e13ecdca7a0bd53798f356d5831434afa5b07b93f0abdf0797b7a06ffe197 + # via nox diff --git a/.kokoro/requirements.in b/.kokoro/requirements.in index ec867d9fd6..fff4d9ce0d 100644 --- a/.kokoro/requirements.in +++ b/.kokoro/requirements.in @@ -1,5 +1,5 @@ gcp-docuploader -gcp-releasetool>=1.10.5 # required for compatibility with cryptography>=39.x +gcp-releasetool>=2 # required for compatibility with cryptography>=42.x importlib-metadata typing-extensions twine @@ -8,3 +8,4 @@ setuptools nox>=2022.11.21 # required to remove dependency on py charset-normalizer<3 click<8.1.0 +cryptography>=42.0.5 diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index bda8e38c4f..dd61f5f320 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -93,40 +93,41 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==42.0.4 \ - --hash=sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b \ - --hash=sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce \ - --hash=sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88 \ - --hash=sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7 \ - --hash=sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20 \ - --hash=sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9 \ - --hash=sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff \ - --hash=sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1 \ - --hash=sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764 \ - --hash=sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b \ - --hash=sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298 \ - --hash=sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1 \ - --hash=sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824 \ - --hash=sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257 \ - --hash=sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a \ - --hash=sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129 \ - --hash=sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb \ - --hash=sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929 \ - --hash=sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854 \ - --hash=sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52 \ - --hash=sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923 \ - --hash=sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885 \ - --hash=sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0 \ - --hash=sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd \ - --hash=sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2 \ - --hash=sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18 \ - --hash=sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b \ - --hash=sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992 \ - --hash=sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74 \ - --hash=sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660 \ - --hash=sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925 \ - --hash=sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449 +cryptography==42.0.5 \ + --hash=sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee \ + --hash=sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576 \ + --hash=sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d \ + --hash=sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30 \ + --hash=sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413 \ + --hash=sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb \ + --hash=sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da \ + --hash=sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4 \ + --hash=sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd \ + --hash=sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc \ + --hash=sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8 \ + --hash=sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1 \ + --hash=sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc \ + --hash=sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e \ + --hash=sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8 \ + --hash=sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940 \ + --hash=sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400 \ + --hash=sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7 \ + --hash=sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16 \ + --hash=sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278 \ + --hash=sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74 \ + --hash=sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec \ + --hash=sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1 \ + --hash=sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2 \ + --hash=sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c \ + --hash=sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922 \ + --hash=sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a \ + --hash=sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6 \ + --hash=sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1 \ + --hash=sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e \ + --hash=sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac \ + --hash=sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7 # via + # -r requirements.in # gcp-releasetool # secretstorage distlib==0.3.7 \ @@ -145,9 +146,9 @@ gcp-docuploader==0.6.5 \ --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea # via -r requirements.in -gcp-releasetool==1.16.0 \ - --hash=sha256:27bf19d2e87aaa884096ff941aa3c592c482be3d6a2bfe6f06afafa6af2353e3 \ - --hash=sha256:a316b197a543fd036209d0caba7a8eb4d236d8e65381c80cbc6d7efaa7606d63 +gcp-releasetool==2.0.0 \ + --hash=sha256:3d73480b50ba243f22d7c7ec08b115a30e1c7817c4899781840c26f9c55b8277 \ + --hash=sha256:7aa9fd935ec61e581eb8458ad00823786d91756c25e492f372b2b30962f3c28f # via -r requirements.in google-api-core==2.12.0 \ --hash=sha256:c22e01b1e3c4dcd90998494879612c38d0a3411d1f7b679eb89e2abe3ce1f553 \ @@ -392,29 +393,18 @@ platformdirs==3.11.0 \ --hash=sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3 \ --hash=sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e # via virtualenv -protobuf==3.20.3 \ - --hash=sha256:03038ac1cfbc41aa21f6afcbcd357281d7521b4157926f30ebecc8d4ea59dcb7 \ - --hash=sha256:28545383d61f55b57cf4df63eebd9827754fd2dc25f80c5253f9184235db242c \ - --hash=sha256:2e3427429c9cffebf259491be0af70189607f365c2f41c7c3764af6f337105f2 \ - --hash=sha256:398a9e0c3eaceb34ec1aee71894ca3299605fa8e761544934378bbc6c97de23b \ - --hash=sha256:44246bab5dd4b7fbd3c0c80b6f16686808fab0e4aca819ade6e8d294a29c7050 \ - --hash=sha256:447d43819997825d4e71bf5769d869b968ce96848b6479397e29fc24c4a5dfe9 \ - --hash=sha256:67a3598f0a2dcbc58d02dd1928544e7d88f764b47d4a286202913f0b2801c2e7 \ - --hash=sha256:74480f79a023f90dc6e18febbf7b8bac7508420f2006fabd512013c0c238f454 \ - --hash=sha256:819559cafa1a373b7096a482b504ae8a857c89593cf3a25af743ac9ecbd23480 \ - --hash=sha256:899dc660cd599d7352d6f10d83c95df430a38b410c1b66b407a6b29265d66469 \ - --hash=sha256:8c0c984a1b8fef4086329ff8dd19ac77576b384079247c770f29cc8ce3afa06c \ - --hash=sha256:9aae4406ea63d825636cc11ffb34ad3379335803216ee3a856787bcf5ccc751e \ - --hash=sha256:a7ca6d488aa8ff7f329d4c545b2dbad8ac31464f1d8b1c87ad1346717731e4db \ - --hash=sha256:b6cc7ba72a8850621bfec987cb72623e703b7fe2b9127a161ce61e61558ad905 \ - --hash=sha256:bf01b5720be110540be4286e791db73f84a2b721072a3711efff6c324cdf074b \ - --hash=sha256:c02ce36ec760252242a33967d51c289fd0e1c0e6e5cc9397e2279177716add86 \ - --hash=sha256:d9e4432ff660d67d775c66ac42a67cf2453c27cb4d738fc22cb53b5d84c135d4 \ - --hash=sha256:daa564862dd0d39c00f8086f88700fdbe8bc717e993a21e90711acfed02f2402 \ - --hash=sha256:de78575669dddf6099a8a0f46a27e82a1783c557ccc38ee620ed8cc96d3be7d7 \ - --hash=sha256:e64857f395505ebf3d2569935506ae0dfc4a15cb80dc25261176c784662cdcc4 \ - --hash=sha256:f4bd856d702e5b0d96a00ec6b307b0f51c1982c2bf9c0052cf9019e9a544ba99 \ - --hash=sha256:f4c42102bc82a51108e449cbb32b19b180022941c727bac0cfd50170341f16ee +protobuf==4.25.3 \ + --hash=sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4 \ + --hash=sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8 \ + --hash=sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c \ + --hash=sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d \ + --hash=sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4 \ + --hash=sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa \ + --hash=sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c \ + --hash=sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019 \ + --hash=sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9 \ + --hash=sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c \ + --hash=sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2 # via # gcp-docuploader # gcp-releasetool @@ -518,7 +508,7 @@ zipp==3.17.0 \ # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==68.2.2 \ - --hash=sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87 \ - --hash=sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a +setuptools==69.2.0 \ + --hash=sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e \ + --hash=sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c # via -r requirements.in diff --git a/CHANGELOG.md b/CHANGELOG.md index 3bca26e361..72d0e833bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,68 @@ [1]: https://pypi.org/project/bigframes/#history +## [1.0.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.26.0...v1.0.0) (2024-03-25) + + +### ⚠ BREAKING CHANGES + +* rename model parameter `min_rel_progress` to `tol` +* `early_stop` setting no longer supported, always uses `True` +* rename model parameter `n_parallell_trees` to `n_estimators` +* rename `class_weights` to `class_weight` +* rename `learn_rate` to `learning_rate` +* PCA `n_components` supports float value and `None`, default to `None` +* rename various ml model parameters for consistency with sklearn (https://github.com/googleapis/python-bigquery-dataframes/pull/491) + +### Features + +* Add configuration option to read_gbq ([#401](https://github.com/googleapis/python-bigquery-dataframes/issues/401)) ([85cede2](https://github.com/googleapis/python-bigquery-dataframes/commit/85cede22587a9fe1dae888721492f9390dc46d70)) +* Add ml ARIMAPlus model params ([#488](https://github.com/googleapis/python-bigquery-dataframes/issues/488)) ([352cb85](https://github.com/googleapis/python-bigquery-dataframes/commit/352cb850d23e41a2278edf0df584b89ee9619aab)) +* Add ml KMeans model params ([#477](https://github.com/googleapis/python-bigquery-dataframes/issues/477)) ([23a8d9a](https://github.com/googleapis/python-bigquery-dataframes/commit/23a8d9a32e1619aff92c8dfabb7bcdd54c314bd5)) +* Add ml LogisticRegression model params ([#481](https://github.com/googleapis/python-bigquery-dataframes/issues/481)) ([f959b65](https://github.com/googleapis/python-bigquery-dataframes/commit/f959b653a0e82b5bfd21f9e994031cf6d25c281a)) +* Add ml PCA model params ([#474](https://github.com/googleapis/python-bigquery-dataframes/issues/474)) ([fb5d83b](https://github.com/googleapis/python-bigquery-dataframes/commit/fb5d83b1e35c465cff486e6cf7862e5b32e3c65a)) +* Add params for LinearRegression model ([#464](https://github.com/googleapis/python-bigquery-dataframes/issues/464)) ([21b2188](https://github.com/googleapis/python-bigquery-dataframes/commit/21b2188cd0ca85485b5171ee9e46da4c924e2ff8)) +* Add support for Python 3.12 ([#231](https://github.com/googleapis/python-bigquery-dataframes/issues/231)) ([df2976f](https://github.com/googleapis/python-bigquery-dataframes/commit/df2976fa9fd0319b824128d0ccf2ebb20f381caa)) +* Allow assigning directly to Series.name property ([#495](https://github.com/googleapis/python-bigquery-dataframes/issues/495)) ([ad0e99e](https://github.com/googleapis/python-bigquery-dataframes/commit/ad0e99eddb1dddd3d439cea7db1e4f222b45c6b9)) +* Ensure `Series.str.len()` can get length of array columns ([#497](https://github.com/googleapis/python-bigquery-dataframes/issues/497)) ([10c0446](https://github.com/googleapis/python-bigquery-dataframes/commit/10c044686228e5c6f3868c1eb10454f6a086ac8b)) +* Option to use bq connection without check ([#460](https://github.com/googleapis/python-bigquery-dataframes/issues/460)) ([0b3f8e5](https://github.com/googleapis/python-bigquery-dataframes/commit/0b3f8e5ce63f75ba99ee8cf29226a0fd38bef99f)) +* PCA `n_components` supports float value and `None`, default to `None` ([65c6f47](https://github.com/googleapis/python-bigquery-dataframes/commit/65c6f4736d1a5552835e4cec8b777b2c0f3dd8da)) +* Rename `class_weights` to `class_weight` ([65c6f47](https://github.com/googleapis/python-bigquery-dataframes/commit/65c6f4736d1a5552835e4cec8b777b2c0f3dd8da)) +* Rename `learn_rate` to `learning_rate` ([65c6f47](https://github.com/googleapis/python-bigquery-dataframes/commit/65c6f4736d1a5552835e4cec8b777b2c0f3dd8da)) +* Rename model parameter `min_rel_progress` to `tol` ([65c6f47](https://github.com/googleapis/python-bigquery-dataframes/commit/65c6f4736d1a5552835e4cec8b777b2c0f3dd8da)) +* Rename model parameter `n_parallell_trees` to `n_estimators` ([65c6f47](https://github.com/googleapis/python-bigquery-dataframes/commit/65c6f4736d1a5552835e4cec8b777b2c0f3dd8da)) +* Rename various ml model parameters for consistency with sklearn (https://github.com/googleapis/python-bigquery-dataframes/pull/491) ([65c6f47](https://github.com/googleapis/python-bigquery-dataframes/commit/65c6f4736d1a5552835e4cec8b777b2c0f3dd8da)) +* Support BQ regional endpoints for europe-west9, europe-west3, us-east4, and us-west1 ([#504](https://github.com/googleapis/python-bigquery-dataframes/issues/504)) ([fbada4a](https://github.com/googleapis/python-bigquery-dataframes/commit/fbada4a70688c5d13fa35d1843b0c4252c5ced72)) +* Support dataframe.cov ([#498](https://github.com/googleapis/python-bigquery-dataframes/issues/498)) ([c4beafd](https://github.com/googleapis/python-bigquery-dataframes/commit/c4beafdf0c1ba88b306ca96fa3ca46b86debaa4c)) +* Support Series.dt.floor ([#493](https://github.com/googleapis/python-bigquery-dataframes/issues/493)) ([2dd01c2](https://github.com/googleapis/python-bigquery-dataframes/commit/2dd01c25e9f01c03979c61e71d3c5cd9f0bd4c96)) +* Support Series.dt.normalize ([#483](https://github.com/googleapis/python-bigquery-dataframes/issues/483)) ([0bf1e91](https://github.com/googleapis/python-bigquery-dataframes/commit/0bf1e916c2b636ec02ac010190e89d38e88fce4b)) +* Update plot sample to 1000 rows ([#458](https://github.com/googleapis/python-bigquery-dataframes/issues/458)) ([60d4a7b](https://github.com/googleapis/python-bigquery-dataframes/commit/60d4a7bbac867256f8bbfd3053c7dd2645c1b062)) + + +### Bug Fixes + +* `early_stop` setting no longer supported, always uses `True` ([65c6f47](https://github.com/googleapis/python-bigquery-dataframes/commit/65c6f4736d1a5552835e4cec8b777b2c0f3dd8da)) +* Fix -1 offset lookups failing ([#463](https://github.com/googleapis/python-bigquery-dataframes/issues/463)) ([2dfb9c2](https://github.com/googleapis/python-bigquery-dataframes/commit/2dfb9c24d07841d785e41b33573c5f3a218efeea)) +* Plot.scatter `c` argument functionalities ([#494](https://github.com/googleapis/python-bigquery-dataframes/issues/494)) ([d6ee994](https://github.com/googleapis/python-bigquery-dataframes/commit/d6ee994c17e0b1dd6768b09ee81d2c902f601b76)) +* Properly support format param for numerical input. ([#486](https://github.com/googleapis/python-bigquery-dataframes/issues/486)) ([ae20c35](https://github.com/googleapis/python-bigquery-dataframes/commit/ae20c3583d5526777548b5d594ecca6034bb49ec)) +* Renable to_csv and to_json related tests ([#468](https://github.com/googleapis/python-bigquery-dataframes/issues/468)) ([2b9a01d](https://github.com/googleapis/python-bigquery-dataframes/commit/2b9a01de0adb8d41fbe73ce94b1acc8d22f507b5)) +* Sampling plot cannot preserve ordering if index is not ordered ([#475](https://github.com/googleapis/python-bigquery-dataframes/issues/475)) ([a5345fe](https://github.com/googleapis/python-bigquery-dataframes/commit/a5345fe8943667a89fcba48ce31aa8ecfc283f92)) +* Use actual BigQuery types rather than ibis types in to_pandas ([#500](https://github.com/googleapis/python-bigquery-dataframes/issues/500)) ([82b4f91](https://github.com/googleapis/python-bigquery-dataframes/commit/82b4f91db365fe06d8bd0bf938f880a48091104e)) + + +### Dependencies + +* Support pandas 2.2 ([#492](https://github.com/googleapis/python-bigquery-dataframes/issues/492)) ([e2cf50e](https://github.com/googleapis/python-bigquery-dataframes/commit/e2cf50e053f7163d1654c4b5621cc93e922d5148)) + + +### Documentation + +* Add code samples for metrics.{accuracy_score, confusion_matrix} ([#478](https://github.com/googleapis/python-bigquery-dataframes/issues/478)) ([3e3329a](https://github.com/googleapis/python-bigquery-dataframes/commit/3e3329a37c1020bd3e6d4d5e980103c63ab0c337)) +* Add code samples for metrics.{recall_score, precision_score, f11_score} ([#502](https://github.com/googleapis/python-bigquery-dataframes/issues/502)) ([370fe90](https://github.com/googleapis/python-bigquery-dataframes/commit/370fe9087848862d02f0e5a333fcb4cd37cf5ca0)) +* Improve API documentation ([#489](https://github.com/googleapis/python-bigquery-dataframes/issues/489)) ([751266e](https://github.com/googleapis/python-bigquery-dataframes/commit/751266e056ac566ef5b6e40fbbca84ed95e7a7a9)) +* Update bigquery connection documentation ([#499](https://github.com/googleapis/python-bigquery-dataframes/issues/499)) ([4bfe094](https://github.com/googleapis/python-bigquery-dataframes/commit/4bfe094fdf2f7e1af72cc939558713a499760129)) +* Update LLM + K-means notebook to handle partial failures ([#496](https://github.com/googleapis/python-bigquery-dataframes/issues/496)) ([97afad9](https://github.com/googleapis/python-bigquery-dataframes/commit/97afad96f80c1815db8ad34f0ff62095631036c2)) + ## [0.26.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.25.0...v0.26.0) (2024-03-20) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 5146b4bc7e..8d68e4fc27 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.9, 3.10 and 3.11 on both UNIX and Windows. + 3.9, 3.10, 3.11 and 3.12 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.11 -- -k + $ nox -s unit-3.12 -- -k .. note:: @@ -143,12 +143,12 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system-3.11 -- -k + $ nox -s system-3.12 -- -k .. note:: - System tests are only configured to run under Python 3.9 and 3.11. + System tests are only configured to run under Python 3.9, 3.11 and 3.12. For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local @@ -261,10 +261,12 @@ We support: - `Python 3.9`_ - `Python 3.10`_ - `Python 3.11`_ +- `Python 3.12`_ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ +.. _Python 3.12: https://docs.python.org/3.12/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/README.rst b/README.rst index ad96382df8..73709641de 100644 --- a/README.rst +++ b/README.rst @@ -232,16 +232,18 @@ you must enable the following APIs: * The BigQuery Connection API (bigqueryconnection.googleapis.com) * The Vertex AI API (aiplatform.googleapis.com) -and you must be granted the following IAM roles: +and you must be granted the following IAM roles in the project: * BigQuery Data Editor (roles/bigquery.dataEditor) * BigQuery Connection Admin (roles/bigquery.connectionAdmin) -* Service Account User (roles/iam.serviceAccountUser) on the - `service account `__ - ``PROJECT_NUMBER-compute@developer.gserviceaccount.com`` +* Service Account User (roles/iam.serviceAccountUser) * Vertex AI User (roles/aiplatform.user) * Project IAM Admin (roles/resourcemanager.projectIamAdmin) if using default - BigQuery connection, or Browser (roles/browser) if using a pre-created connection + BigQuery connection, or Browser (roles/browser) if using a pre-configured connection. + This requirement can be avoided by setting + ``bigframes.pandas.options.bigquery.skip_bq_connection_check`` option to ``True``, + in which case the connection (default or pre-configured) would be + used as-is without any existence or permission check. ML locations @@ -288,11 +290,17 @@ into `BigQuery remote functions `_ . Creating a remote function in BigQuery DataFrames (See `code samples `_) -creates a BigQuery remote function, a `BigQuery -connection -`_ , -and a `Cloud Functions (2nd gen) function -`_ . +creates: + +1. A `Cloud Functions (2nd gen) function `_. +2. A `BigQuery connection `_. + If the BigQuery connection is created, the BigQuery service will + create a + `Google Cloud-managed IAM service account `_ + and attach it to the connection. You can use a pre-configured BigQuery + connection if you prefer, in which case the connection creation is skipped. +3. A BigQuery remote function that talks to the cloud function (1) using the BigQuery + connection (2). BigQuery connections are created in the same location as the BigQuery DataFrames session, using the name you provide in the custom function @@ -331,17 +339,19 @@ To use BigQuery DataFrames remote functions, you must enable the following APIs: * The Cloud Resource Manager API (cloudresourcemanager.googleapis.com) To use BigQuery DataFrames remote functions, you must be granted the -following IAM roles: +following IAM roles in the project: * BigQuery Data Editor (roles/bigquery.dataEditor) * BigQuery Connection Admin (roles/bigquery.connectionAdmin) * Cloud Functions Developer (roles/cloudfunctions.developer) -* Service Account User (roles/iam.serviceAccountUser) on the - `service account `__ - ``PROJECT_NUMBER-compute@developer.gserviceaccount.com`` +* Service Account User (roles/iam.serviceAccountUser) * Storage Object Viewer (roles/storage.objectViewer) * Project IAM Admin (roles/resourcemanager.projectIamAdmin) if using default - BigQuery connection, or Browser (roles/browser) if using a pre-created connection + BigQuery connection, or Browser (roles/browser) if using a pre-configured connection. + This requirement can be avoided by setting + ``bigframes.pandas.options.bigquery.skip_bq_connection_check`` option to ``True``, + in which case the connection (default or pre-configured) would be + used as-is without any existence or permission check. **Limitations** diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 34701740f6..d035fe5df1 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -40,6 +40,7 @@ def __init__( use_regional_endpoints: bool = False, application_name: Optional[str] = None, kms_key_name: Optional[str] = None, + skip_bq_connection_check: bool = False, ): self._credentials = credentials self._project = project @@ -48,6 +49,7 @@ def __init__( self._use_regional_endpoints = use_regional_endpoints self._application_name = application_name self._kms_key_name = kms_key_name + self._skip_bq_connection_check = skip_bq_connection_check self._session_started = False @property @@ -105,14 +107,16 @@ def project(self, value: Optional[str]): @property def bq_connection(self) -> Optional[str]: - """Name of the BigQuery connection to use. Should be of the form ... + """Name of the BigQuery connection to use. Should be of the form + ... You should either have the connection already created in the location you have chosen, or you should have the Project IAM Admin role to enable the service to create the connection for you if you need it. - If this option isn't provided, or project or location aren't provided, session will use its default project/location/connection_id as default connection. + If this option isn't provided, or project or location aren't provided, + session will use its default project/location/connection_id as default connection. """ return self._bq_connection @@ -122,6 +126,26 @@ def bq_connection(self, value: Optional[str]): raise ValueError(SESSION_STARTED_MESSAGE.format(attribute="bq_connection")) self._bq_connection = value + @property + def skip_bq_connection_check(self) -> bool: + """Forcibly use the BigQuery connection. + + Setting this flag to True would avoid creating the BigQuery connection + and checking or setting IAM permissions on it. So if the BigQuery + connection (default or user-provided) does not exist, or it does not have + necessary permissions set up to support BigQuery DataFrames operations, + then a runtime error will be reported. + """ + return self._skip_bq_connection_check + + @skip_bq_connection_check.setter + def skip_bq_connection_check(self, value: bool): + if self._session_started and self._skip_bq_connection_check != value: + raise ValueError( + SESSION_STARTED_MESSAGE.format(attribute="skip_bq_connection_check") + ) + self._skip_bq_connection_check = value + @property def use_regional_endpoints(self) -> bool: """Flag to connect to regional API endpoints. diff --git a/bigframes/clients.py b/bigframes/clients.py index de2421e499..8a2dbfed6c 100644 --- a/bigframes/clients.py +++ b/bigframes/clients.py @@ -27,6 +27,23 @@ logger = logging.getLogger(__name__) +def resolve_full_bq_connection_name( + connection_name: str, default_project: str, default_location: str +) -> str: + """Retrieve the full connection name of the form ... + Use default project, location or connection_id when any of them are missing.""" + if connection_name.count(".") == 2: + return connection_name + + if connection_name.count(".") == 1: + return f"{default_project}.{connection_name}" + + if connection_name.count(".") == 0: + return f"{default_project}.{default_location}.{connection_name}" + + raise ValueError(f"Invalid connection name format: {connection_name}.") + + class BqConnectionManager: """Manager to handle operations with BQ connections.""" @@ -41,23 +58,6 @@ def __init__( self._bq_connection_client = bq_connection_client self._cloud_resource_manager_client = cloud_resource_manager_client - @classmethod - def resolve_full_connection_name( - cls, connection_name: str, default_project: str, default_location: str - ) -> str: - """Retrieve the full connection name of the form ... - Use default project, location or connection_id when any of them are missing.""" - if connection_name.count(".") == 2: - return connection_name - - if connection_name.count(".") == 1: - return f"{default_project}.{connection_name}" - - if connection_name.count(".") == 0: - return f"{default_project}.{default_location}.{connection_name}" - - raise ValueError(f"Invalid connection name format: {connection_name}.") - def create_bq_connection( self, project_id: str, location: str, connection_id: str, iam_role: str ): @@ -73,12 +73,6 @@ def create_bq_connection( iam_role: str of the IAM role that the service account of the created connection needs to aquire. E.g. 'run.invoker', 'aiplatform.user' """ - # TODO(shobs): The below command to enable BigQuery Connection API needs - # to be automated. Disabling for now since most target users would not - # have the privilege to enable API in a project. - # log("Making sure BigQuery Connection API is enabled") - # if os.system("gcloud services enable bigqueryconnection.googleapis.com"): - # raise ValueError("Failed to enable BigQuery Connection API") # If the intended connection does not exist then create it service_account_id = self._get_service_account_if_connection_exists( project_id, location, connection_id diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py index 02582b17ba..6fd6fc23c2 100644 --- a/bigframes/core/__init__.py +++ b/bigframes/core/__init__.py @@ -14,21 +14,26 @@ from __future__ import annotations from dataclasses import dataclass +import functools import io import typing from typing import Iterable, Sequence import ibis.expr.types as ibis_types import pandas +import pyarrow as pa +import pyarrow.feather as pa_feather import bigframes.core.compile as compiling import bigframes.core.expression as ex import bigframes.core.guid import bigframes.core.join_def as join_def +import bigframes.core.local_data as local_data import bigframes.core.nodes as nodes -from bigframes.core.ordering import OrderingColumnReference +from bigframes.core.ordering import OrderingExpression import bigframes.core.ordering as orderings import bigframes.core.rewrite +import bigframes.core.schema as schemata import bigframes.core.utils from bigframes.core.window_spec import WindowSpec import bigframes.dtypes @@ -63,28 +68,32 @@ def from_ibis( node = nodes.ReadGbqNode( table=table, table_session=session, - columns=tuple(columns), + columns=tuple( + bigframes.dtypes.ibis_value_to_canonical_type(column) + for column in columns + ), hidden_ordering_columns=tuple(hidden_ordering_columns), ordering=ordering, ) return cls(node) @classmethod - def from_pandas(cls, pd_df: pandas.DataFrame, session: bigframes.Session): + def from_pyarrow(cls, arrow_table: pa.Table, session: Session): + adapted_table = local_data.adapt_pa_table(arrow_table) + schema = local_data.arrow_schema_to_bigframes(adapted_table.schema) + iobytes = io.BytesIO() - # Use alphanumeric identifiers, to avoid downstream problems with escaping. - as_ids = [ - bigframes.core.utils.label_to_identifier(label, strict=True) - for label in pd_df.columns - ] - unique_ids = tuple(bigframes.core.utils.disambiguate_ids(as_ids)) - pd_df.reset_index(drop=True).set_axis(unique_ids, axis=1).to_feather(iobytes) - node = nodes.ReadLocalNode(feather_bytes=iobytes.getvalue(), session=session) + pa_feather.write_feather(adapted_table, iobytes) + node = nodes.ReadLocalNode( + iobytes.getvalue(), + data_schema=schema, + session=session, + ) return cls(node) @property def column_ids(self) -> typing.Sequence[str]: - return self._compile_ordered().column_ids + return self.schema.names @property def session(self) -> Session: @@ -95,6 +104,32 @@ def session(self) -> Session: required_session if (required_session is not None) else get_global_session() ) + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + # TODO: switch to use self.node.schema + return self._compiled_schema + + @functools.cached_property + def _compiled_schema(self) -> schemata.ArraySchema: + compiled = self._compile_unordered() + items = tuple( + schemata.SchemaItem(id, compiled.get_column_type(id)) + for id in compiled.column_ids + ) + return schemata.ArraySchema(items) + + def validate_schema(self): + tree_derived = self.node.schema + ibis_derived = self._compiled_schema + if tree_derived.names != ibis_derived.names: + raise ValueError( + f"Unexpected names internal {tree_derived.names} vs compiled {ibis_derived.names}" + ) + if tree_derived.dtypes != ibis_derived.dtypes: + raise ValueError( + f"Unexpected types internal {tree_derived.dtypes} vs compiled {ibis_derived.dtypes}" + ) + def _try_evaluate_local(self): """Use only for unit testing paths - not fully featured. Will throw exception if fails.""" import ibis @@ -104,7 +139,7 @@ def _try_evaluate_local(self): ) def get_column_type(self, key: str) -> bigframes.dtypes.Dtype: - return self._compile_ordered().get_column_type(key) + return self.schema.get_type(key) def _compile_ordered(self) -> compiling.OrderedIR: return compiling.compile_ordered_ir(self.node) @@ -127,7 +162,7 @@ def filter_by_id(self, predicate_id: str, keep_null: bool = False) -> ArrayValue def filter(self, predicate: ex.Expression): return ArrayValue(nodes.FilterNode(child=self.node, predicate=predicate)) - def order_by(self, by: Sequence[OrderingColumnReference]) -> ArrayValue: + def order_by(self, by: Sequence[OrderingExpression]) -> ArrayValue: return ArrayValue(nodes.OrderByNode(child=self.node, by=tuple(by))) def reversed(self) -> ArrayValue: diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py index 6b9a367f55..c789b2a69c 100644 --- a/bigframes/core/block_transforms.py +++ b/bigframes/core/block_transforms.py @@ -176,7 +176,7 @@ def _interpolate_column( ) -> typing.Tuple[blocks.Block, str]: if interpolate_method not in ["linear", "nearest", "ffill"]: raise ValueError("interpolate method not supported") - window_ordering = (ordering.OrderingColumnReference(x_values),) + window_ordering = (ordering.OrderingExpression(ex.free_var(x_values)),) backwards_window = windows.WindowSpec(following=0, ordering=window_ordering) forwards_window = windows.WindowSpec(preceding=0, ordering=window_ordering) @@ -338,8 +338,8 @@ def value_counts( if sort: block = block.order_by( [ - ordering.OrderingColumnReference( - count_id, + ordering.OrderingExpression( + ex.free_var(count_id), direction=ordering.OrderingDirection.ASC if ascending else ordering.OrderingDirection.DESC, @@ -398,8 +398,8 @@ def rank( window = windows.WindowSpec( # BigQuery has syntax to reorder nulls with "NULLS FIRST/LAST", but that is unavailable through ibis presently, so must order on a separate nullity expression first. ordering=( - ordering.OrderingColumnReference( - col, + ordering.OrderingExpression( + ex.free_var(col), ordering.OrderingDirection.ASC if ascending else ordering.OrderingDirection.DESC, @@ -481,8 +481,8 @@ def nsmallest( if keep == "last": block = block.reversed() order_refs = [ - ordering.OrderingColumnReference( - col_id, direction=ordering.OrderingDirection.ASC + ordering.OrderingExpression( + ex.free_var(col_id), direction=ordering.OrderingDirection.ASC ) for col_id in column_ids ] @@ -511,8 +511,8 @@ def nlargest( if keep == "last": block = block.reversed() order_refs = [ - ordering.OrderingColumnReference( - col_id, direction=ordering.OrderingDirection.DESC + ordering.OrderingExpression( + ex.free_var(col_id), direction=ordering.OrderingDirection.DESC ) for col_id in column_ids ] @@ -804,9 +804,9 @@ def _idx_extrema( ) # Have to find the min for each order_refs = [ - ordering.OrderingColumnReference(value_col, direction), + ordering.OrderingExpression(ex.free_var(value_col), direction), *[ - ordering.OrderingColumnReference(idx_col) + ordering.OrderingExpression(ex.free_var(idx_col)) for idx_col in original_block.index_columns ], ] diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 0ebbe48cc4..afa13375b1 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -26,12 +26,12 @@ import itertools import random import typing -from typing import Iterable, List, Mapping, Optional, Sequence, Tuple +from typing import Iterable, List, Literal, Mapping, Optional, Sequence, Tuple import warnings -import bigframes_vendored.pandas.io.common as vendored_pandas_io_common import google.cloud.bigquery as bigquery import pandas as pd +import pyarrow as pa import bigframes._config.sampling_options as sampling_options import bigframes.constants as constants @@ -141,32 +141,23 @@ def __init__( self._stats_cache[" ".join(self.index_columns)] = {} @classmethod - def from_local(cls, data, session: bigframes.Session) -> Block: - pd_data = pd.DataFrame(data) - columns = pd_data.columns - - # Make a flattened version to treat as a table. - if len(pd_data.columns.names) > 1: - pd_data.columns = columns.to_flat_index() - + def from_local(cls, data: pd.DataFrame, session: bigframes.Session) -> Block: + # Assumes caller has already converted datatypes to bigframes ones. + pd_data = data + column_labels = pd_data.columns index_labels = list(pd_data.index.names) - # The ArrayValue layer doesn't know about indexes, so make sure indexes - # are real columns with unique IDs. - pd_data = pd_data.reset_index( - names=[f"level_{level}" for level in range(len(index_labels))] - ) - pd_data = pd_data.set_axis( - vendored_pandas_io_common.dedup_names( - list(pd_data.columns), is_potential_multiindex=False - ), - axis="columns", - ) - index_ids = pd_data.columns[: len(index_labels)] - keys_expr = core.ArrayValue.from_pandas(pd_data, session) + # unique internal ids + column_ids = [f"column_{i}" for i in range(len(pd_data.columns))] + index_ids = [f"level_{level}" for level in range(pd_data.index.nlevels)] + + pd_data = pd_data.set_axis(column_ids, axis=1) + pd_data = pd_data.reset_index(names=index_ids) + as_pyarrow = pa.Table.from_pandas(pd_data, preserve_index=False) + array_value = core.ArrayValue.from_pyarrow(as_pyarrow, session=session) return cls( - keys_expr, - column_labels=columns, + array_value, + column_labels=column_labels, index_columns=index_ids, index_labels=index_labels, ) @@ -279,7 +270,7 @@ def cols_matching_label(self, partial_label: Label) -> typing.Sequence[str]: def order_by( self, - by: typing.Sequence[ordering.OrderingColumnReference], + by: typing.Sequence[ordering.OrderingExpression], ) -> Block: return Block( self._expr.order_by(by), @@ -484,6 +475,7 @@ def _copy_index_to_pandas(self, df: pd.DataFrame): # general Sequence[Label] that BigQuery DataFrames has. # See: https://github.com/pandas-dev/pandas-stubs/issues/804 df.index.names = self.index.names # type: ignore + df.columns = self.column_labels def _materialize_local( self, materialize_options: MaterializationOptions = MaterializationOptions() @@ -563,7 +555,7 @@ def _downsample( block = self._split( fracs=(fraction,), random_state=random_state, - preserve_order=True, + sort=False, )[0] return block else: @@ -579,7 +571,7 @@ def _split( fracs: Iterable[float] = (), *, random_state: Optional[int] = None, - preserve_order: Optional[bool] = False, + sort: Optional[bool | Literal["random"]] = "random", ) -> List[Block]: """Internal function to support splitting Block to multiple parts along index axis. @@ -618,7 +610,9 @@ def _split( string_ordering_col, random_state_col, ops.strconcat_op ) block, hash_string_sum_col = block.apply_unary_op(string_sum_col, ops.hash_op) - block = block.order_by([ordering.OrderingColumnReference(hash_string_sum_col)]) + block = block.order_by( + [ordering.OrderingExpression(ex.free_var(hash_string_sum_col))] + ) intervals = [] cur = 0 @@ -631,9 +625,22 @@ def _split( typing.cast(Block, block.slice(start=lower, stop=upper)) for lower, upper in intervals ] - if preserve_order: + + if sort is True: sliced_blocks = [ - sliced_block.order_by([ordering.OrderingColumnReference(ordering_col)]) + sliced_block.order_by( + [ + ordering.OrderingExpression(ex.free_var(idx_col)) + for idx_col in sliced_block.index_columns + ] + ) + for sliced_block in sliced_blocks + ] + elif sort is False: + sliced_blocks = [ + sliced_block.order_by( + [ordering.OrderingExpression(ex.free_var(ordering_col))] + ) for sliced_block in sliced_blocks ] @@ -1103,13 +1110,22 @@ def summarize( index_columns=[label_col_id], ) - def corr(self): - """Returns a block object to compute the self-correlation on this block.""" + def calculate_pairwise_metric(self, op=agg_ops.CorrOp()): + """ + Returns a block object to compute pairwise metrics among all value columns in this block. + + The metric to be computed is specified by the `op` parameter, which can be either a + correlation operation (default) or a covariance operation. + """ + if len(self.value_columns) > 30: + raise NotImplementedError( + "This function supports dataframes with 30 columns or fewer. " + f"Provided dataframe has {len(self.value_columns)} columns. {constants.FEEDBACK_LINK}" + ) + aggregations = [ ( - ex.BinaryAggregation( - agg_ops.CorrOp(), ex.free_var(left_col), ex.free_var(right_col) - ), + ex.BinaryAggregation(op, ex.free_var(left_col), ex.free_var(right_col)), f"{left_col}-{right_col}", ) for left_col in self.value_columns @@ -1703,7 +1719,10 @@ def merge( if sort: # sort uses coalesced join keys always joined_expr = joined_expr.order_by( - [ordering.OrderingColumnReference(col_id) for col_id in coalesced_ids], + [ + ordering.OrderingExpression(ex.free_var(col_id)) + for col_id in coalesced_ids + ], ) joined_expr = joined_expr.select_columns(result_columns) @@ -2025,7 +2044,10 @@ def join_mono_indexed( ) if sort: combined_expr = combined_expr.order_by( - [ordering.OrderingColumnReference(col_id) for col_id in coalesced_join_cols] + [ + ordering.OrderingExpression(ex.free_var(col_id)) + for col_id in coalesced_join_cols + ] ) block = Block( combined_expr, @@ -2114,7 +2136,10 @@ def join_multi_indexed( ) if sort: combined_expr = combined_expr.order_by( - [ordering.OrderingColumnReference(col_id) for col_id in coalesced_join_cols] + [ + ordering.OrderingExpression(ex.free_var(col_id)) + for col_id in coalesced_join_cols + ] ) if left.index.nlevels == 1: diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py index 7245689aae..af2d69275a 100644 --- a/bigframes/core/compile/compiled.py +++ b/bigframes/core/compile/compiled.py @@ -15,6 +15,7 @@ import abc import functools +import itertools import textwrap import typing from typing import Collection, Iterable, Literal, Optional, Sequence @@ -31,11 +32,13 @@ import bigframes.core.expression as ex import bigframes.core.guid from bigframes.core.ordering import ( + ascending_over, encode_order_string, ExpressionOrdering, IntegerEncoding, - OrderingColumnReference, + OrderingExpression, ) +import bigframes.core.schema as schemata import bigframes.core.utils as utils from bigframes.core.window_spec import WindowSpec import bigframes.dtypes @@ -96,6 +99,10 @@ def _reduced_predicate(self) -> typing.Optional[ibis_types.BooleanValue]: else None ) + @property + def _ibis_bindings(self) -> dict[str, ibis_types.Value]: + return {col: self._get_ibis_column(col) for col in self.column_ids} + @abc.abstractmethod def filter(self: T, predicate: ex.Expression) -> T: """Filter the table on a given expression, the predicate must be a boolean expression.""" @@ -242,7 +249,7 @@ def row_count(self) -> OrderedIR: ibis_table, (ibis_table["count"],), ordering=ExpressionOrdering( - ordering_value_columns=(OrderingColumnReference("count"),), + ordering_value_columns=(ascending_over("count"),), total_ordering_columns=frozenset(["count"]), ), ) @@ -456,12 +463,7 @@ def aggregate( result = table.group_by(by_column_ids).aggregate(**stats) # Must have deterministic ordering, so order by the unique "by" column ordering = ExpressionOrdering( - tuple( - [ - OrderingColumnReference(column_id=column_id) - for column_id in by_column_ids - ] - ), + tuple([ascending_over(column_id) for column_id in by_column_ids]), total_ordering_columns=frozenset(by_column_ids), ) columns = tuple(result[key] for key in result.columns) @@ -475,12 +477,10 @@ def aggregate( aggregates = {**stats, ORDER_ID_COLUMN: ibis_types.literal(0)} result = table.aggregate(**aggregates) # Ordering is irrelevant for single-row output, but set ordering id regardless as other ops(join etc.) expect it. + # TODO: Maybe can make completely empty ordering = ExpressionOrdering( - ordering_value_columns=tuple( - [OrderingColumnReference(ORDER_ID_COLUMN)] - ), - total_ordering_columns=frozenset([ORDER_ID_COLUMN]), - integer_encoding=IntegerEncoding(is_encoded=True, is_sequential=True), + ordering_value_columns=tuple([]), + total_ordering_columns=frozenset([]), ) return OrderedIR( result, @@ -582,9 +582,6 @@ def __init__( predicates: Optional[Collection[ibis_types.BooleanValue]] = None, ): super().__init__(table, columns, predicates) - # TODO: Validate ordering - if not ordering.total_ordering_columns: - raise ValueError("Must have total ordering defined by one or more columns") self._ordering = ordering # Meta columns store ordering, or other data that doesn't correspond to dataframe columns self._hidden_ordering_columns = ( @@ -614,7 +611,8 @@ def __init__( all_columns = value_col_ids | hidden_col_ids ordering_valid = all( - col.column_id in all_columns for col in ordering.all_ordering_columns + set(col.scalar_expression.unbound_variables).issubset(all_columns) + for col in ordering.all_ordering_columns ) if value_col_ids & hidden_col_ids: raise ValueError( @@ -627,65 +625,42 @@ def __init__( def from_pandas( cls, pd_df: pandas.DataFrame, + schema: schemata.ArraySchema, ) -> OrderedIR: """ Builds an in-memory only (SQL only) expr from a pandas dataframe. + + Assumed that the dataframe has unique string column names and bigframes-suppported dtypes. """ - # We can't include any hidden columns in the ArrayValue constructor, so - # grab the column names before we add the hidden ordering column. - column_names = [str(column) for column in pd_df.columns] - # Make sure column names are all strings. - pd_df = pd_df.set_axis(column_names, axis="columns") - pd_df = pd_df.assign(**{ORDER_ID_COLUMN: range(len(pd_df))}) # ibis memtable cannot handle NA, must convert to None - pd_df = pd_df.astype("object") # type: ignore - pd_df = pd_df.where(pandas.notnull(pd_df), None) - - # NULL type isn't valid in BigQuery, so retry with an explicit schema in these cases. - keys_memtable = ibis.memtable(pd_df) - schema = keys_memtable.schema() - new_schema = [] - for column_index, column in enumerate(schema): - if column == ORDER_ID_COLUMN: - new_type: ibis_dtypes.DataType = ibis_dtypes.int64 - else: - column_type = schema[column] - # The autodetected type might not be one we can support, such - # as NULL type for empty rows, so convert to a type we do - # support. - new_type = bigframes.dtypes.bigframes_dtype_to_ibis_dtype( - bigframes.dtypes.ibis_dtype_to_bigframes_dtype(column_type) - ) - # TODO(swast): Ibis memtable doesn't use backticks in struct - # field names, so spaces and other characters aren't allowed in - # the memtable context. Blocked by - # https://github.com/ibis-project/ibis/issues/7187 - column = f"col_{column_index}" - new_schema.append((column, new_type)) - - # must set non-null column labels. these are not the user-facing labels - pd_df = pd_df.set_axis( - [column for column, _ in new_schema], - axis="columns", - ) - keys_memtable = ibis.memtable(pd_df, schema=ibis.schema(new_schema)) + # this destroys the schema however + ibis_values = pd_df.astype("object").where(pandas.notnull(pd_df), None) # type: ignore + ibis_values = ibis_values.assign(**{ORDER_ID_COLUMN: range(len(pd_df))}) + # derive the ibis schema from the original pandas schema + ibis_schema = [ + (name, bigframes.dtypes.bigframes_dtype_to_ibis_dtype(dtype)) + for name, dtype in zip(schema.names, schema.dtypes) + ] + ibis_schema.append((ORDER_ID_COLUMN, ibis_dtypes.int64)) + + keys_memtable = ibis.memtable(ibis_values, schema=ibis.schema(ibis_schema)) return cls( keys_memtable, - columns=[ - keys_memtable[f"col_{column_index}"].name(column) - for column_index, column in enumerate(column_names) - ], + columns=[keys_memtable[column].name(column) for column in pd_df.columns], ordering=ExpressionOrdering( - ordering_value_columns=tuple( - [OrderingColumnReference(ORDER_ID_COLUMN)] - ), + ordering_value_columns=tuple([ascending_over(ORDER_ID_COLUMN)]), total_ordering_columns=frozenset([ORDER_ID_COLUMN]), ), hidden_ordering_columns=(keys_memtable[ORDER_ID_COLUMN],), ) + @property + def _ibis_bindings(self) -> dict[str, ibis_types.Value]: + all_keys = itertools.chain(self.column_ids, self._hidden_column_ids) + return {col: self._get_any_column(col) for col in all_keys} + @property def _hidden_column_ids(self) -> typing.Sequence[str]: return tuple(self._hidden_ordering_column_names.keys()) @@ -714,7 +689,7 @@ def builder(self) -> OrderedIR.Builder: predicates=self._predicates, ) - def order_by(self, by: Sequence[OrderingColumnReference]) -> OrderedIR: + def order_by(self, by: Sequence[OrderingExpression]) -> OrderedIR: expr_builder = self.builder() expr_builder.ordering = self._ordering.with_ordering_columns(by) return expr_builder.build() @@ -755,7 +730,9 @@ def promote_offsets(self, col_id: str) -> OrderedIR: return self._project_offsets().promote_offsets(col_id) expr_builder = self.builder() expr_builder.columns = [ - self._get_any_column(ordering.total_order_col.column_id).name(col_id), + self._compile_expression(ordering.total_order_col.scalar_expression).name( + col_id + ), *self.columns, ] return expr_builder.build() @@ -930,7 +907,7 @@ def unpivot( ordering_value_columns=tuple( [ *old_ordering.ordering_value_columns, - OrderingColumnReference(unpivot_offset_id), + ascending_over(unpivot_offset_id), ] ), total_ordering_columns=frozenset( @@ -941,7 +918,7 @@ def unpivot( new_ordering = ExpressionOrdering( ordering_value_columns=tuple( [ - OrderingColumnReference(unpivot_offset_id), + ascending_over(unpivot_offset_id), *old_ordering.ordering_value_columns, ] ), @@ -974,9 +951,12 @@ def _reproject_to_table(self) -> OrderedIR: expose_hidden_cols=True, ) columns = [table[column_name] for column_name in self._column_names] - ordering_col_ids = [ - ref.column_id for ref in self._ordering.all_ordering_columns - ] + ordering_col_ids = list( + itertools.chain.from_iterable( + ref.scalar_expression.unbound_variables + for ref in self._ordering.all_ordering_columns + ) + ) hidden_ordering_columns = [ table[column_name] for column_name in self._hidden_ordering_column_names @@ -994,20 +974,24 @@ def to_sql( col_id_overrides: typing.Mapping[str, str] = {}, sorted: bool = False, ) -> str: - sql = ibis_bigquery.Backend().compile( - self._to_ibis_expr( - ordering_mode="unordered", - col_id_overrides=col_id_overrides, - expose_hidden_cols=sorted, - ) - ) if sorted: + # Need to bake ordering expressions into the selected column in order for our ordering clause builder to work. + baked_ir = self._bake_ordering() + sql = ibis_bigquery.Backend().compile( + baked_ir._to_ibis_expr( + ordering_mode="unordered", + col_id_overrides=col_id_overrides, + expose_hidden_cols=True, + ) + ) output_columns = [ col_id_overrides.get(col) if (col in col_id_overrides) else col - for col in self.column_ids + for col in baked_ir.column_ids ] selection = ", ".join(map(lambda col_id: f"`{col_id}`", output_columns)) - order_by_clause = self._ordering_clause(self._ordering.all_ordering_columns) + order_by_clause = baked_ir._ordering_clause( + baked_ir._ordering.all_ordering_columns + ) sql = textwrap.dedent( f"SELECT {selection}\n" @@ -1016,14 +1000,29 @@ def to_sql( ")\n" f"{order_by_clause}\n" ) + else: + sql = ibis_bigquery.Backend().compile( + self._to_ibis_expr( + ordering_mode="unordered", + col_id_overrides=col_id_overrides, + expose_hidden_cols=False, + ) + ) return typing.cast(str, sql) - def _ordering_clause(self, ordering: Iterable[OrderingColumnReference]) -> str: + def _ordering_clause(self, ordering: Iterable[OrderingExpression]) -> str: parts = [] for col_ref in ordering: asc_desc = "ASC" if col_ref.direction.is_ascending else "DESC" null_clause = "NULLS LAST" if col_ref.na_last else "NULLS FIRST" - part = f"`{col_ref.column_id}` {asc_desc} {null_clause}" + ordering_expr = col_ref.scalar_expression + # We don't know how to compile scalar expressions in isolation + if ordering_expr.is_const: + # Probably shouldn't have constants in ordering definition, but best to ignore if somehow they end up here. + continue + if not isinstance(ordering_expr, ex.UnboundVariableExpression): + raise ValueError("Expected direct column reference.") + part = f"`{ordering_expr.id}` {asc_desc} {null_clause}" parts.append(part) return f"ORDER BY {' ,'.join(parts)}" @@ -1135,9 +1134,12 @@ def _filter(self, predicate_value: ibis_types.BooleanValue) -> OrderedIR: def _set_or_replace_by_id(self, id: str, new_value: ibis_types.Value) -> OrderedIR: """Safely assign by id while maintaining ordering integrity.""" # TODO: Split into explicit set and replace methods - ordering_col_ids = [ - col_ref.column_id for col_ref in self._ordering.ordering_value_columns - ] + ordering_col_ids = set( + itertools.chain.from_iterable( + col_ref.scalar_expression.unbound_variables + for col_ref in self._ordering.ordering_value_columns + ) + ) if id in ordering_col_ids: return self._hide_column(id)._set_or_replace_by_id(id, new_value) @@ -1154,9 +1156,14 @@ def _set_or_replace_by_id(self, id: str, new_value: ibis_types.Value) -> Ordered def _select(self, values: typing.Tuple[ibis_types.Value]) -> OrderedIR: """Safely assign by id while maintaining ordering integrity.""" # TODO: Split into explicit set and replace methods - ordering_col_ids = [ - col_ref.column_id for col_ref in self._ordering.ordering_value_columns - ] + ordering_col_ids = set( + itertools.chain.from_iterable( + [ + col_ref.scalar_expression.unbound_variables + for col_ref in self._ordering.ordering_value_columns + ] + ) + ) ir = self mappings = {value.name: value for value in values} for ordering_id in ordering_col_ids: @@ -1209,17 +1216,43 @@ def _hide_column(self, column_id) -> OrderedIR: expr_builder.ordering = self._ordering.with_column_remap({column_id: new_name}) return expr_builder.build() + def _bake_ordering(self) -> OrderedIR: + """Bakes ordering expression into the selection, maybe creating hidden columns.""" + ordering_expressions = self._ordering.all_ordering_columns + new_exprs = [] + new_baked_cols = [] + for expr in ordering_expressions: + if isinstance(expr.scalar_expression, ex.OpExpression): + baked_column = self._compile_expression(expr.scalar_expression).name( + bigframes.core.guid.generate_guid() + ) + new_baked_cols.append(baked_column) + new_expr = OrderingExpression( + ex.free_var(baked_column.name), expr.direction, expr.na_last + ) + new_exprs.append(new_expr) + else: + new_exprs.append(expr) + + ordering = self._ordering.with_ordering_columns(new_exprs) + return OrderedIR( + self._table, + columns=self.columns, + hidden_ordering_columns=[*self._hidden_ordering_columns, *new_baked_cols], + ordering=ordering, + predicates=self._predicates, + ) + def _project_offsets(self) -> OrderedIR: """Create a new expression that contains offsets. Should only be executed when offsets are needed for an operations. Has no effect on expression semantics.""" if self._ordering.is_sequential: return self - # TODO(tbergeron): Enforce total ordering table = self._to_ibis_expr( ordering_mode="offset_col", order_col_name=ORDER_ID_COLUMN ) columns = [table[column_name] for column_name in self._column_names] ordering = ExpressionOrdering( - ordering_value_columns=tuple([OrderingColumnReference(ORDER_ID_COLUMN)]), + ordering_value_columns=tuple([ascending_over(ORDER_ID_COLUMN)]), total_ordering_columns=frozenset([ORDER_ID_COLUMN]), integer_encoding=IntegerEncoding(True, is_sequential=True), ) @@ -1247,7 +1280,9 @@ def _create_order_columns( def _create_offset_column(self) -> ibis_types.IntegerColumn: if self._ordering.total_order_col and self._ordering.is_sequential: - offsets = self._get_any_column(self._ordering.total_order_col.column_id) + offsets = self._compile_expression( + self._ordering.total_order_col.scalar_expression + ) return typing.cast(ibis_types.IntegerColumn, offsets) else: window = ibis.window(order_by=self._ibis_order) @@ -1258,8 +1293,8 @@ def _create_offset_column(self) -> ibis_types.IntegerColumn: def _create_string_ordering_column(self) -> ibis_types.StringColumn: if self._ordering.total_order_col and self._ordering.is_string_encoded: - string_order_ids = self._get_any_column( - self._ordering.total_order_col.column_id + string_order_ids = op_compiler.compile_expression( + self._ordering.total_order_col.scalar_expression, self._ibis_bindings ) return typing.cast(ibis_types.StringColumn, string_order_ids) if ( @@ -1267,7 +1302,9 @@ def _create_string_ordering_column(self) -> ibis_types.StringColumn: and self._ordering.integer_encoding.is_encoded ): # Special case: non-negative integer ordering id can be converted directly to string without regenerating row numbers - int_values = self._get_any_column(self._ordering.total_order_col.column_id) + int_values = self._compile_expression( + self._ordering.total_order_col.scalar_expression + ) return encode_order_string( typing.cast(ibis_types.IntegerColumn, int_values), ) @@ -1281,6 +1318,9 @@ def _create_string_ordering_column(self) -> ibis_types.StringColumn: ) return encode_order_string(row_nums) + def _compile_expression(self, expr: ex.Expression): + return op_compiler.compile_expression(expr, self._ibis_bindings) + def _ibis_window_from_spec(self, window_spec: WindowSpec, allow_ties: bool = False): group_by: typing.List[ibis_types.Value] = ( [ @@ -1354,25 +1394,25 @@ def _reduce_predicate_list( def _convert_ordering_to_table_values( value_lookup: typing.Mapping[str, ibis_types.Value], - ordering_columns: typing.Sequence[OrderingColumnReference], + ordering_columns: typing.Sequence[OrderingExpression], ) -> typing.Sequence[ibis_types.Value]: column_refs = ordering_columns ordering_values = [] for ordering_col in column_refs: - column = typing.cast(ibis_types.Column, value_lookup[ordering_col.column_id]) + expr = op_compiler.compile_expression( + ordering_col.scalar_expression, value_lookup + ) ordering_value = ( - ibis.asc(column) - if ordering_col.direction.is_ascending - else ibis.desc(column) + ibis.asc(expr) if ordering_col.direction.is_ascending else ibis.desc(expr) ) # Bigquery SQL considers NULLS to be "smallest" values, but we need to override in these cases. if (not ordering_col.na_last) and (not ordering_col.direction.is_ascending): # Force nulls to be first - is_null_val = typing.cast(ibis_types.Column, column.isnull()) + is_null_val = typing.cast(ibis_types.Column, expr.isnull()) ordering_values.append(ibis.desc(is_null_val)) elif (ordering_col.na_last) and (ordering_col.direction.is_ascending): # Force nulls to be last - is_null_val = typing.cast(ibis_types.Column, column.isnull()) + is_null_val = typing.cast(ibis_types.Column, expr.isnull()) ordering_values.append(ibis.asc(is_null_val)) ordering_values.append(ordering_value) return ordering_values diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py index ec6c79db5f..6f10d85f31 100644 --- a/bigframes/core/compile/compiler.py +++ b/bigframes/core/compile/compiler.py @@ -41,7 +41,8 @@ def compile_peak_sql(node: nodes.BigFrameNode, n_rows: int) -> typing.Optional[s return compile_unordered_ir(node).peek_sql(n_rows) -@functools.cache +# TODO: Remove cache when schema no longer requires compilation to derive schema (and therefor only compiles for execution) +@functools.lru_cache(maxsize=5000) def compile_node( node: nodes.BigFrameNode, ordered: bool = True ) -> compiled.UnorderedIR | compiled.OrderedIR: @@ -80,7 +81,7 @@ def compile_join(node: nodes.JoinNode, ordered: bool = True): @_compile_node.register def compile_readlocal(node: nodes.ReadLocalNode, ordered: bool = True): array_as_pd = pd.read_feather(io.BytesIO(node.feather_bytes)) - ordered_ir = compiled.OrderedIR.from_pandas(array_as_pd) + ordered_ir = compiled.OrderedIR.from_pandas(array_as_pd, node.schema) if ordered: return ordered_ir else: diff --git a/bigframes/core/compile/concat.py b/bigframes/core/compile/concat.py index d39569370e..41a8f97821 100644 --- a/bigframes/core/compile/concat.py +++ b/bigframes/core/compile/concat.py @@ -20,8 +20,8 @@ import bigframes.core.compile.compiled as compiled from bigframes.core.ordering import ( + ascending_over, ExpressionOrdering, - OrderingColumnReference, reencode_order_string, StringEncoding, ) @@ -84,7 +84,7 @@ def concat_ordered( tables.append(table) combined_table = ibis.union(*tables) ordering = ExpressionOrdering( - ordering_value_columns=tuple([OrderingColumnReference(ORDER_ID_COLUMN)]), + ordering_value_columns=tuple([ascending_over(ORDER_ID_COLUMN)]), total_ordering_columns=frozenset([ORDER_ID_COLUMN]), string_encoding=StringEncoding(True, prefix_size + max_encoding_size), ) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index c95d1ca45e..d2fc453835 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -42,6 +42,8 @@ # Datetime constants UNIT_TO_US_CONVERSION_FACTORS = { + "W": 7 * 24 * 60 * 60 * 1000 * 1000, + "d": 24 * 60 * 60 * 1000 * 1000, "D": 24 * 60 * 60 * 1000 * 1000, "h": 60 * 60 * 1000 * 1000, "m": 60 * 1000 * 1000, @@ -622,6 +624,26 @@ def strftime_op_impl(x: ibis_types.Value, op: ops.StrftimeOp): ) +@scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True) +def floor_dt_op_impl(x: ibis_types.Value, op: ops.FloorDtOp): + supported_freqs = ["Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us", "ns"] + pandas_to_ibis_freqs = {"min": "m"} + if op.freq not in supported_freqs: + raise NotImplementedError( + f"Unsupported freq paramater: {op.freq}" + + " Supported freq parameters are: " + + ",".join(supported_freqs) + ) + if op.freq in pandas_to_ibis_freqs: + ibis_freq = pandas_to_ibis_freqs[op.freq] + else: + ibis_freq = op.freq + result_type = x.type() + result = typing.cast(ibis_types.TimestampValue, x) + result = result.truncate(ibis_freq) + return result.cast(result_type) + + @scalar_op_compiler.register_unary_op(ops.time_op) def time_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.TimestampValue, x).time() @@ -632,6 +654,13 @@ def year_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.TimestampValue, x).year().cast(ibis_dtypes.int64) +@scalar_op_compiler.register_unary_op(ops.normalize_op) +def normalize_op_impl(x: ibis_types.Value): + result_type = x.type() + result = x.truncate("D") + return result.cast(result_type) + + # Parameterized ops @scalar_op_compiler.register_unary_op(ops.StructFieldOp, pass_op=True) def struct_field_op_impl(x: ibis_types.Value, op: ops.StructFieldOp): @@ -726,12 +755,19 @@ def to_datetime_op_impl(x: ibis_types.Value, op: ops.ToDatetimeOp): if x.type() == ibis_dtypes.str: x = x.to_timestamp(op.format) if op.format else timestamp(x) elif x.type() == ibis_dtypes.Timestamp(timezone="UTC"): + if op.format: + raise NotImplementedError( + f"Format parameter is not supported for Timestamp input types. {constants.FEEDBACK_LINK}" + ) return x elif x.type() != ibis_dtypes.timestamp: - # The default unit is set to "ns" (nanoseconds) for consistency - # with pandas, where "ns" is the default unit for datetime operations. - unit = op.unit or "ns" - x = numeric_to_datatime(x, unit) + if op.format: + x = x.cast(ibis_dtypes.str).to_timestamp(op.format) + else: + # The default unit is set to "ns" (nanoseconds) for consistency + # with pandas, where "ns" is the default unit for datetime operations. + unit = op.unit or "ns" + x = numeric_to_datatime(x, unit) return x.cast(ibis_dtypes.Timestamp(timezone="UTC" if op.utc else None)) @@ -1070,8 +1106,16 @@ def floordiv_op( ) -def _is_float(x: ibis_types.Value): - return isinstance(x, (ibis_types.FloatingColumn, ibis_types.FloatingScalar)) +def _is_bignumeric(x: ibis_types.Value): + if not isinstance(x, ibis_types.DecimalValue): + return False + # Should be exactly 76 for bignumeric + return x.precision > 70 + + +def _is_numeric(x: ibis_types.Value): + # either big-numeric or numeric + return isinstance(x, ibis_types.DecimalValue) @scalar_op_compiler.register_binary_op(ops.mod_op) @@ -1080,40 +1124,88 @@ def mod_op( x: ibis_types.Value, y: ibis_types.Value, ): - is_result_float = _is_float(x) | _is_float(y) - x_numeric = typing.cast( - ibis_types.NumericValue, - x.cast(ibis_dtypes.Decimal(precision=38, scale=9, nullable=True)) - if is_result_float - else x, - ) - y_numeric = typing.cast( - ibis_types.NumericValue, - y.cast(ibis_dtypes.Decimal(precision=38, scale=9, nullable=True)) - if is_result_float - else y, + # Hacky short-circuit to avoid passing zero-literal to sql backend, evaluate locally instead to null. + op = y.op() + if isinstance(op, ibis.expr.operations.generic.Literal) and op.value == 0: + return ibis_types.null().cast(x.type()) + + if x.type().is_integer() and y.type().is_integer(): + # both are ints, no casting necessary + return _int_mod(x, y) + + else: + # bigquery doens't support float mod, so just cast to bignumeric and hope for the best + x_numeric = typing.cast( + ibis_types.DecimalValue, + x.cast(ibis_dtypes.Decimal(precision=76, scale=38, nullable=True)), + ) + y_numeric = typing.cast( + ibis_types.DecimalValue, + y.cast(ibis_dtypes.Decimal(precision=76, scale=38, nullable=True)), + ) + mod_numeric = _bignumeric_mod(x_numeric, y_numeric) + + # Cast back down based on original types + if _is_bignumeric(x) or _is_bignumeric(y): + return mod_numeric + if _is_numeric(x) or _is_numeric(y): + return mod_numeric.cast(ibis_dtypes.Decimal(38, 9)) + else: + return mod_numeric.cast(ibis_dtypes.float64) + + +def _bignumeric_mod( + x: ibis_types.IntegerValue, + y: ibis_types.IntegerValue, +): + # Hacky short-circuit to avoid passing zero-literal to sql backend, evaluate locally instead to null. + op = y.op() + if isinstance(op, ibis.expr.operations.generic.Literal) and op.value == 0: + return ibis_types.null().cast(x.type()) + + bq_mod = x % y # Bigquery will maintain x sign here + + # In BigQuery returned value has the same sign as X. In pandas, the sign of y is used, so we need to flip the result if sign(x) != sign(y) + return ( + ibis.case() + .when( + y == _ZERO, + _NAN * x, + ) # Dummy op to propogate nulls and type from x arg + .when( + (y < _ZERO) & (bq_mod > _ZERO), (y + bq_mod) + ) # Convert positive result to negative + .when( + (y > _ZERO) & (bq_mod < _ZERO), (y + bq_mod) + ) # Convert negative result to positive + .else_(bq_mod) + .end() ) + + +def _int_mod( + x: ibis_types.IntegerValue, + y: ibis_types.IntegerValue, +): # Hacky short-circuit to avoid passing zero-literal to sql backend, evaluate locally instead to null. op = y.op() if isinstance(op, ibis.expr.operations.generic.Literal) and op.value == 0: return ibis_types.null().cast(x.type()) - bq_mod = x_numeric % y_numeric # Bigquery will maintain x sign here - if is_result_float: - bq_mod = typing.cast(ibis_types.NumericValue, bq_mod.cast(ibis_dtypes.float64)) + bq_mod = x % y # Bigquery will maintain x sign here # In BigQuery returned value has the same sign as X. In pandas, the sign of y is used, so we need to flip the result if sign(x) != sign(y) return ( ibis.case() .when( - y_numeric == _ZERO, - _NAN * x_numeric if is_result_float else _ZERO * x_numeric, + y == _ZERO, + _ZERO * x, ) # Dummy op to propogate nulls and type from x arg .when( - (y_numeric < _ZERO) & (bq_mod > _ZERO), (y_numeric + bq_mod) + (y < _ZERO) & (bq_mod > _ZERO), (y + bq_mod) ) # Convert positive result to negative .when( - (y_numeric > _ZERO) & (bq_mod < _ZERO), (y_numeric + bq_mod) + (y > _ZERO) & (bq_mod < _ZERO), (y + bq_mod) ) # Convert negative result to positive .else_(bq_mod) .end() diff --git a/bigframes/core/compile/single_column.py b/bigframes/core/compile/single_column.py index 31ebf87d17..f1a3d723ac 100644 --- a/bigframes/core/compile/single_column.py +++ b/bigframes/core/compile/single_column.py @@ -179,12 +179,10 @@ def join_orderings( left_order_dominates: bool = True, ) -> orderings.ExpressionOrdering: left_ordering_refs = [ - ref.with_name(left_id_mapping[ref.column_id]) - for ref in left.all_ordering_columns + ref.remap_names(left_id_mapping) for ref in left.all_ordering_columns ] right_ordering_refs = [ - ref.with_name(right_id_mapping[ref.column_id]) - for ref in right.all_ordering_columns + ref.remap_names(right_id_mapping) for ref in right.all_ordering_columns ] if left_order_dominates: joined_refs = [*left_ordering_refs, *right_ordering_refs] diff --git a/bigframes/core/expression.py b/bigframes/core/expression.py index 4c2ae461fd..8c3f52d22b 100644 --- a/bigframes/core/expression.py +++ b/bigframes/core/expression.py @@ -39,6 +39,12 @@ class Aggregation(abc.ABC): op: agg_ops.WindowOp = dataclasses.field() + @abc.abstractmethod + def output_type( + self, input_types: dict[str, dtypes.ExpressionType] + ) -> dtypes.ExpressionType: + ... + @dataclasses.dataclass(frozen=True) class UnaryAggregation(Aggregation): @@ -47,6 +53,11 @@ class UnaryAggregation(Aggregation): UnboundVariableExpression, ScalarConstantExpression ] = dataclasses.field() + def output_type( + self, input_types: dict[str, bigframes.dtypes.Dtype] + ) -> dtypes.ExpressionType: + return self.op.output_type(self.arg.output_type(input_types)) + @dataclasses.dataclass(frozen=True) class BinaryAggregation(Aggregation): @@ -58,6 +69,13 @@ class BinaryAggregation(Aggregation): UnboundVariableExpression, ScalarConstantExpression ] = dataclasses.field() + def output_type( + self, input_types: dict[str, bigframes.dtypes.Dtype] + ) -> dtypes.ExpressionType: + return self.op.output_type( + self.left.output_type(input_types), self.right.output_type(input_types) + ) + @dataclasses.dataclass(frozen=True) class Expression(abc.ABC): @@ -67,7 +85,7 @@ class Expression(abc.ABC): def unbound_variables(self) -> typing.Tuple[str, ...]: return () - def rename(self, name_mapping: dict[str, str]) -> Expression: + def rename(self, name_mapping: Mapping[str, str]) -> Expression: return self @property @@ -86,6 +104,10 @@ def bind_all_variables(self, bindings: Mapping[str, Expression]) -> Expression: """Replace all variables with expression given in `bindings`.""" ... + @property + def is_bijective(self) -> bool: + return False + @dataclasses.dataclass(frozen=True) class ScalarConstantExpression(Expression): @@ -107,6 +129,11 @@ def output_type( def bind_all_variables(self, bindings: Mapping[str, Expression]) -> Expression: return self + @property + def is_bijective(self) -> bool: + # () <-> value + return True + @dataclasses.dataclass(frozen=True) class UnboundVariableExpression(Expression): @@ -118,7 +145,7 @@ class UnboundVariableExpression(Expression): def unbound_variables(self) -> typing.Tuple[str, ...]: return (self.id,) - def rename(self, name_mapping: dict[str, str]) -> Expression: + def rename(self, name_mapping: Mapping[str, str]) -> Expression: if self.id in name_mapping: return UnboundVariableExpression(name_mapping[self.id]) else: @@ -134,7 +161,7 @@ def output_type( if self.id in input_types: return input_types[self.id] else: - raise ValueError("Type of variable has not been fixed.") + raise ValueError(f"Type of variable {self.id} has not been fixed.") def bind_all_variables(self, bindings: Mapping[str, Expression]) -> Expression: if self.id in bindings.keys(): @@ -142,6 +169,10 @@ def bind_all_variables(self, bindings: Mapping[str, Expression]) -> Expression: else: raise ValueError(f"Variable {self.id} remains unbound") + @property + def is_bijective(self) -> bool: + return True + @dataclasses.dataclass(frozen=True) class OpExpression(Expression): @@ -161,7 +192,7 @@ def unbound_variables(self) -> typing.Tuple[str, ...]: ) ) - def rename(self, name_mapping: dict[str, str]) -> Expression: + def rename(self, name_mapping: Mapping[str, str]) -> Expression: return OpExpression( self.op, tuple(input.rename(name_mapping) for input in self.inputs) ) @@ -183,3 +214,8 @@ def bind_all_variables(self, bindings: Mapping[str, Expression]) -> Expression: self.op, tuple(input.bind_all_variables(bindings) for input in self.inputs), ) + + @property + def is_bijective(self) -> bool: + # TODO: Mark individual functions as bijective? + return False diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py index 2b447a0190..e2b28553c6 100644 --- a/bigframes/core/groupby/__init__.py +++ b/bigframes/core/groupby/__init__.py @@ -220,7 +220,7 @@ def rolling(self, window: int, min_periods=None) -> windows.Window: min_periods=min_periods or window, ) block = self._block.order_by( - [order.OrderingColumnReference(col) for col in self._by_col_ids], + [order.ascending_over(col) for col in self._by_col_ids], ) return windows.Window( block, window_spec, self._selected_cols, drop_null_groups=self._dropna @@ -233,7 +233,7 @@ def expanding(self, min_periods: int = 1) -> windows.Window: min_periods=min_periods, ) block = self._block.order_by( - [order.OrderingColumnReference(col) for col in self._by_col_ids], + [order.ascending_over(col) for col in self._by_col_ids], ) return windows.Window( block, window_spec, self._selected_cols, drop_null_groups=self._dropna @@ -573,7 +573,7 @@ def rolling(self, window: int, min_periods=None) -> windows.Window: min_periods=min_periods or window, ) block = self._block.order_by( - [order.OrderingColumnReference(col) for col in self._by_col_ids], + [order.ascending_over(col) for col in self._by_col_ids], ) return windows.Window( block, @@ -590,7 +590,7 @@ def expanding(self, min_periods: int = 1) -> windows.Window: min_periods=min_periods, ) block = self._block.order_by( - [order.OrderingColumnReference(col) for col in self._by_col_ids], + [order.ascending_over(col) for col in self._by_col_ids], ) return windows.Window( block, diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index 8d6a1cbdfe..da6f3f3740 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -402,7 +402,8 @@ def _iloc_getitem_series_or_dataframe( pd.Series, ]: if isinstance(key, int): - internal_slice_result = series_or_dataframe._slice(key, key + 1, 1) + stop_key = key + 1 if key != -1 else None + internal_slice_result = series_or_dataframe._slice(key, stop_key, 1) result_pd_df = internal_slice_result.to_pandas() if result_pd_df.empty: raise IndexError("single positional indexer is out-of-bounds") diff --git a/bigframes/core/indexes/index.py b/bigframes/core/indexes/index.py index 958b742636..c818b68711 100644 --- a/bigframes/core/indexes/index.py +++ b/bigframes/core/indexes/index.py @@ -261,13 +261,12 @@ def transpose(self) -> Index: def sort_values(self, *, ascending: bool = True, na_position: str = "last"): if na_position not in ["first", "last"]: raise ValueError("Param na_position must be one of 'first' or 'last'") - direction = ( - order.OrderingDirection.ASC if ascending else order.OrderingDirection.DESC - ) na_last = na_position == "last" index_columns = self._block.index_columns ordering = [ - order.OrderingColumnReference(column, direction=direction, na_last=na_last) + order.ascending_over(column, na_last) + if ascending + else order.descending_over(column, na_last) for column in index_columns ] return Index(self._block.order_by(ordering)) @@ -303,13 +302,8 @@ def argmax(self) -> int: block, row_nums = self._block.promote_offsets() block = block.order_by( [ - *[ - order.OrderingColumnReference( - col, direction=order.OrderingDirection.DESC - ) - for col in self._block.index_columns - ], - order.OrderingColumnReference(row_nums), + *[order.descending_over(col) for col in self._block.index_columns], + order.ascending_over(row_nums), ] ) import bigframes.series as series @@ -320,11 +314,8 @@ def argmin(self) -> int: block, row_nums = self._block.promote_offsets() block = block.order_by( [ - *[ - order.OrderingColumnReference(col) - for col in self._block.index_columns - ], - order.OrderingColumnReference(row_nums), + *[order.ascending_over(col) for col in self._block.index_columns], + order.ascending_over(row_nums), ] ) import bigframes.series as series diff --git a/bigframes/core/joins/merge.py b/bigframes/core/joins/merge.py index c65e1bdd54..1542cda0af 100644 --- a/bigframes/core/joins/merge.py +++ b/bigframes/core/joins/merge.py @@ -18,15 +18,18 @@ from __future__ import annotations +import typing from typing import Literal, Optional -from bigframes.dataframe import DataFrame -from bigframes.series import Series +# Avoid cirular imports. +if typing.TYPE_CHECKING: + import bigframes.dataframe + import bigframes.series def merge( - left: DataFrame, - right: DataFrame, + left: bigframes.dataframe.DataFrame, + right: bigframes.dataframe.DataFrame, how: Literal[ "inner", "left", @@ -40,7 +43,7 @@ def merge( right_on: Optional[str] = None, sort: bool = False, suffixes: tuple[str, str] = ("_x", "_y"), -) -> DataFrame: +) -> bigframes.dataframe.DataFrame: left = _validate_operand(left) right = _validate_operand(right) @@ -55,14 +58,19 @@ def merge( ) -def _validate_operand(obj: DataFrame | Series) -> DataFrame: - if isinstance(obj, DataFrame): +def _validate_operand( + obj: bigframes.dataframe.DataFrame | bigframes.series.Series, +) -> bigframes.dataframe.DataFrame: + import bigframes.dataframe + import bigframes.series + + if isinstance(obj, bigframes.dataframe.DataFrame): return obj - elif isinstance(obj, Series): + elif isinstance(obj, bigframes.series.Series): if obj.name is None: - raise ValueError("Cannot merge a Series without a name") + raise ValueError("Cannot merge a bigframes.series.Series without a name") return obj.to_frame() else: raise TypeError( - f"Can only merge Series or DataFrame objects, a {type(obj)} was passed" + f"Can only merge bigframes.series.Series or bigframes.dataframe.DataFrame objects, a {type(obj)} was passed" ) diff --git a/bigframes/core/local_data.py b/bigframes/core/local_data.py new file mode 100644 index 0000000000..8b256be6d2 --- /dev/null +++ b/bigframes/core/local_data.py @@ -0,0 +1,63 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Methods that deal with local pandas/pyarrow dataframes.""" + +from __future__ import annotations + +import pyarrow as pa + +import bigframes.core.schema as schemata +import bigframes.dtypes + + +def arrow_schema_to_bigframes(arrow_schema: pa.Schema) -> schemata.ArraySchema: + """Infer the corresponding bigframes schema given a pyarrow schema.""" + schema_items = tuple( + schemata.SchemaItem( + field.name, + bigframes_type_for_arrow_type(field.type), + ) + for field in arrow_schema + ) + return schemata.ArraySchema(schema_items) + + +def adapt_pa_table(arrow_table: pa.Table) -> pa.Table: + """Adapt a pyarrow table to one that can be handled by bigframes. Converts tz to UTC and unit to us for temporal types.""" + new_schema = pa.schema( + [ + pa.field(field.name, arrow_type_replacements(field.type)) + for field in arrow_table.schema + ] + ) + return arrow_table.cast(new_schema) + + +def bigframes_type_for_arrow_type(pa_type: pa.DataType) -> bigframes.dtypes.Dtype: + return bigframes.dtypes.ibis_dtype_to_bigframes_dtype( + bigframes.dtypes.arrow_dtype_to_ibis_dtype(arrow_type_replacements(pa_type)) + ) + + +def arrow_type_replacements(type: pa.DataType) -> pa.DataType: + if pa.types.is_timestamp(type): + # This is potentially lossy, but BigFrames doesn't support ns + new_tz = "UTC" if (type.tz is not None) else None + return pa.timestamp(unit="us", tz=new_tz) + if pa.types.is_time64(type): + # This is potentially lossy, but BigFrames doesn't support ns + return pa.time64("us") + else: + return type diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index 9da535e15f..5ebd2a5997 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -14,6 +14,7 @@ from __future__ import annotations +import abc from dataclasses import dataclass, field, fields import functools import itertools @@ -24,8 +25,9 @@ import bigframes.core.expression as ex import bigframes.core.guid -from bigframes.core.join_def import JoinDefinition -from bigframes.core.ordering import OrderingColumnReference +from bigframes.core.join_def import JoinColumnMapping, JoinDefinition, JoinSide +from bigframes.core.ordering import OrderingExpression +import bigframes.core.schema as schemata import bigframes.core.window_spec as window import bigframes.dtypes import bigframes.operations.aggregations as agg_ops @@ -100,6 +102,11 @@ def roots(self) -> typing.Set[BigFrameNode]: ) return set(roots) + @property + @abc.abstractmethod + def schema(self) -> schemata.ArraySchema: + ... + @dataclass(frozen=True) class UnaryNode(BigFrameNode): @@ -109,6 +116,10 @@ class UnaryNode(BigFrameNode): def child_nodes(self) -> typing.Sequence[BigFrameNode]: return (self.child,) + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + return self.child.schema + @dataclass(frozen=True) class JoinNode(BigFrameNode): @@ -138,11 +149,34 @@ def peekable(self) -> bool: single_root = len(self.roots) == 1 return children_peekable and single_root + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + def join_mapping_to_schema_item(mapping: JoinColumnMapping): + result_id = mapping.destination_id + result_dtype = ( + self.left_child.schema.get_type(mapping.source_id) + if mapping.source_table == JoinSide.LEFT + else self.right_child.schema.get_type(mapping.source_id) + ) + return schemata.SchemaItem(result_id, result_dtype) + + items = tuple( + join_mapping_to_schema_item(mapping) for mapping in self.join.mappings + ) + return schemata.ArraySchema(items) + @dataclass(frozen=True) class ConcatNode(BigFrameNode): children: Tuple[BigFrameNode, ...] + def __post_init__(self): + if len(self.children) == 0: + raise ValueError("Concat requires at least one input table. Zero provided.") + child_schemas = [child.schema.dtypes for child in self.children] + if not len(set(child_schemas)) == 1: + raise ValueError("All inputs must have identical dtypes. {child_schemas}") + @property def child_nodes(self) -> typing.Sequence[BigFrameNode]: return self.children @@ -150,11 +184,21 @@ def child_nodes(self) -> typing.Sequence[BigFrameNode]: def __hash__(self): return self._node_hash + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + # TODO: Output names should probably be aligned beforehand or be part of concat definition + items = tuple( + schemata.SchemaItem(f"column_{i}", dtype) + for i, dtype in enumerate(self.children[0].schema.dtypes) + ) + return schemata.ArraySchema(items) + # Input Nodex @dataclass(frozen=True) class ReadLocalNode(BigFrameNode): feather_bytes: bytes + data_schema: schemata.ArraySchema session: typing.Optional[bigframes.session.Session] = None def __hash__(self): @@ -168,6 +212,10 @@ def peekable(self) -> bool: def roots(self) -> typing.Set[BigFrameNode]: return {self} + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + return self.data_schema + # TODO: Refactor to take raw gbq object reference @dataclass(frozen=True) @@ -193,6 +241,17 @@ def peekable(self) -> bool: def roots(self) -> typing.Set[BigFrameNode]: return {self} + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + items = tuple( + schemata.SchemaItem( + value.get_name(), + bigframes.dtypes.ibis_dtype_to_bigframes_dtype(value.type()), + ) + for value in self.columns + ) + return schemata.ArraySchema(items) + # Unary nodes @dataclass(frozen=True) @@ -210,6 +269,12 @@ def peekable(self) -> bool: def non_local(self) -> bool: return False + @property + def schema(self) -> schemata.ArraySchema: + return self.child.schema.prepend( + schemata.SchemaItem(self.col_id, bigframes.dtypes.INT_DTYPE) + ) + @dataclass(frozen=True) class FilterNode(UnaryNode): @@ -225,7 +290,16 @@ def __hash__(self): @dataclass(frozen=True) class OrderByNode(UnaryNode): - by: Tuple[OrderingColumnReference, ...] + by: Tuple[OrderingExpression, ...] + + def __post_init__(self): + available_variables = self.child.schema.names + for order_expr in self.by: + for variable in order_expr.scalar_expression.unbound_variables: + if variable not in available_variables: + raise ValueError( + f"Cannot over unknown id:{variable}, columns are {available_variables}" + ) def __hash__(self): return self._node_hash @@ -247,6 +321,17 @@ class ProjectionNode(UnaryNode): def __hash__(self): return self._node_hash + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + input_types = self.child.schema._mapping + items = tuple( + schemata.SchemaItem( + id, bigframes.dtypes.dtype_for_etype(ex.output_type(input_types)) + ) + for ex, id in self.assignments + ) + return schemata.ArraySchema(items) + # TODO: Merge RowCount into Aggregate Node? # Row count can be compute from table metadata sometimes, so it is a bit special. @@ -260,6 +345,12 @@ def row_preserving(self) -> bool: def non_local(self) -> bool: return True + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + return schemata.ArraySchema( + (schemata.SchemaItem("count", bigframes.dtypes.INT_DTYPE),) + ) + @dataclass(frozen=True) class AggregateNode(UnaryNode): @@ -282,6 +373,21 @@ def peekable(self) -> bool: def non_local(self) -> bool: return True + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + by_items = tuple( + schemata.SchemaItem(id, self.child.schema.get_type(id)) + for id in self.by_column_ids + ) + input_types = self.child.schema._mapping + agg_items = tuple( + schemata.SchemaItem( + id, bigframes.dtypes.dtype_for_etype(agg.output_type(input_types)) + ) + for agg, id in self.aggregations + ) + return schemata.ArraySchema(tuple([*by_items, *agg_items])) + @dataclass(frozen=True) class WindowOpNode(UnaryNode): @@ -303,6 +409,18 @@ def peekable(self) -> bool: def non_local(self) -> bool: return True + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + input_type = self.child.schema.get_type(self.column_name) + new_item_dtype = self.op.output_type(input_type) + if self.output_name is None: + return self.child.schema.update_dtype(self.column_name, new_item_dtype) + if self.output_name in self.child.schema.names: + return self.child.schema.update_dtype(self.output_name, new_item_dtype) + return self.child.schema.append( + schemata.SchemaItem(self.output_name, new_item_dtype) + ) + @dataclass(frozen=True) class ReprojectOpNode(UnaryNode): @@ -312,6 +430,7 @@ def __hash__(self): @dataclass(frozen=True) class UnpivotNode(UnaryNode): + # TODO: Refactor unpivot row_labels: typing.Tuple[typing.Hashable, ...] unpivot_columns: typing.Tuple[ typing.Tuple[str, typing.Tuple[typing.Optional[str], ...]], ... @@ -338,6 +457,47 @@ def non_local(self) -> bool: def peekable(self) -> bool: return False + @functools.cached_property + def schema(self) -> schemata.ArraySchema: + def infer_dtype( + values: typing.Iterable[typing.Hashable], + ) -> bigframes.dtypes.Dtype: + item_types = map(lambda x: bigframes.dtypes.infer_literal_type(x), values) + etype = functools.reduce( + lambda t1, t2: bigframes.dtypes.lcd_type(t1, t2) + if (t1 and t2) + else None, + item_types, + ) + return bigframes.dtypes.dtype_for_etype(etype) + + label_tuples = [ + label if isinstance(label, tuple) else (label,) for label in self.row_labels + ] + idx_dtypes = [ + infer_dtype(map(lambda x: typing.cast(tuple, x)[i], label_tuples)) + for i in range(len(self.index_col_ids)) + ] + + index_items = [ + schemata.SchemaItem(id, dtype) + for id, dtype in zip(self.index_col_ids, idx_dtypes) + ] + value_dtypes = ( + self.dtype + if isinstance(self.dtype, tuple) + else (self.dtype,) * len(self.unpivot_columns) + ) + value_items = [ + schemata.SchemaItem(col[0], dtype) + for col, dtype in zip(self.unpivot_columns, value_dtypes) + ] + passthrough_items = [ + schemata.SchemaItem(id, self.child.schema.get_type(id)) + for id in self.passthrough_columns + ] + return schemata.ArraySchema((*index_items, *value_items, *passthrough_items)) + @dataclass(frozen=True) class RandomSampleNode(UnaryNode): diff --git a/bigframes/core/ordering.py b/bigframes/core/ordering.py index bbfc7cf9d8..2543a3b722 100644 --- a/bigframes/core/ordering.py +++ b/bigframes/core/ordering.py @@ -18,7 +18,7 @@ from enum import Enum import math import typing -from typing import Optional, Sequence +from typing import Mapping, Optional, Sequence, Set import ibis.expr.datatypes as ibis_dtypes import ibis.expr.types as ibis_types @@ -46,40 +46,28 @@ def is_ascending(self) -> bool: return self == OrderingDirection.ASC -@dataclass(frozen=True) -class OrderingColumnReference: - """References a column and how to order with respect to values in that column.""" - - column_id: str - direction: OrderingDirection = OrderingDirection.ASC - na_last: bool = True - - def with_name(self, name: str) -> OrderingColumnReference: - return OrderingColumnReference(name, self.direction, self.na_last) - - def with_reverse(self) -> OrderingColumnReference: - return OrderingColumnReference( - self.column_id, self.direction.reverse(), not self.na_last - ) - - @dataclass(frozen=True) class OrderingExpression: - """ - An expression that defines a scalar value to order, a direction and a null behavior. Maps directly to ORDER BY expressions in GoogleSQL. - This is more of OrderingColumnReference which order on a previously projected column id instead of any scalar expression. - """ + """References a column and how to order with respect to values in that column.""" - # TODO: Right now, expression trees requires projecting a value before it can be sorted on. If OrderByNode used this instead, we could avoid some such projections and simplify the tree. scalar_expression: expression.Expression direction: OrderingDirection = OrderingDirection.ASC na_last: bool = True - def remap_names(self, mapping: dict[str, str]) -> OrderingExpression: + def remap_names(self, mapping: Mapping[str, str]) -> OrderingExpression: return OrderingExpression( self.scalar_expression.rename(mapping), self.direction, self.na_last ) + def bind_variables( + self, mapping: Mapping[str, expression.Expression] + ) -> OrderingExpression: + return OrderingExpression( + self.scalar_expression.bind_all_variables(mapping), + self.direction, + self.na_last, + ) + def with_reverse(self) -> OrderingExpression: return OrderingExpression( self.scalar_expression, self.direction.reverse(), not self.na_last @@ -109,7 +97,7 @@ class IntegerEncoding: class ExpressionOrdering: """Immutable object that holds information about the ordering of rows in a ArrayValue object.""" - ordering_value_columns: typing.Tuple[OrderingColumnReference, ...] = () + ordering_value_columns: typing.Tuple[OrderingExpression, ...] = () integer_encoding: IntegerEncoding = IntegerEncoding(False) string_encoding: StringEncoding = StringEncoding(False) # A table has a total ordering defined by the identities of a set of 1 or more columns. @@ -120,7 +108,7 @@ class ExpressionOrdering: @classmethod def from_offset_col(cls, col: str) -> ExpressionOrdering: return ExpressionOrdering( - (OrderingColumnReference(col),), + (ascending_over(col),), integer_encoding=IntegerEncoding(True, is_sequential=True), total_ordering_columns=frozenset({col}), ) @@ -143,7 +131,7 @@ def with_non_sequential(self): def with_ordering_columns( self, - ordering_value_columns: Sequence[OrderingColumnReference] = (), + ordering_value_columns: Sequence[OrderingExpression] = (), ) -> ExpressionOrdering: """Creates a new ordering that reorders by the given columns. @@ -154,18 +142,10 @@ def with_ordering_columns( Returns: Modified ExpressionOrdering """ - col_ids_new = [ - ordering_ref.column_id for ordering_ref in ordering_value_columns - ] - old_ordering_keep = [ - ordering_ref - for ordering_ref in self.ordering_value_columns - if ordering_ref.column_id not in col_ids_new - ] # Truncate to remove any unneded col references after all total order cols included new_ordering = self._truncate_ordering( - (*ordering_value_columns, *old_ordering_keep) + (*ordering_value_columns, *self.ordering_value_columns) ) return ExpressionOrdering( new_ordering, @@ -173,15 +153,20 @@ def with_ordering_columns( ) def _truncate_ordering( - self, order_refs: tuple[OrderingColumnReference, ...] - ) -> tuple[OrderingColumnReference, ...]: - total_order_cols_remaining = set(self.total_ordering_columns) - for i in range(len(order_refs)): - column = order_refs[i].column_id - if column in total_order_cols_remaining: - total_order_cols_remaining.remove(column) - if len(total_order_cols_remaining) == 0: - return order_refs[: i + 1] + self, order_refs: tuple[OrderingExpression, ...] + ) -> tuple[OrderingExpression, ...]: + # Truncate once we refer to a full key in bijective operations + must_see = set(self.total_ordering_columns) + columns_seen: Set[str] = set() + truncated_refs = [] + for order_part in order_refs: + expr = order_part.scalar_expression + if not set(expr.unbound_variables).issubset(columns_seen): + if expr.is_bijective: + columns_seen.update(expr.unbound_variables) + truncated_refs.append(order_part) + if columns_seen.issuperset(must_see): + return tuple(truncated_refs) raise ValueError("Ordering did not contain all total_order_cols") def with_reverse(self): @@ -193,8 +178,7 @@ def with_reverse(self): def with_column_remap(self, mapping: typing.Mapping[str, str]): new_value_columns = [ - col.with_name(mapping.get(col.column_id, col.column_id)) - for col in self.ordering_value_columns + col.remap_names(mapping) for col in self.all_ordering_columns ] new_total_order = frozenset( mapping.get(col_id, col_id) for col_id in self.total_ordering_columns @@ -207,7 +191,7 @@ def with_column_remap(self, mapping: typing.Mapping[str, str]): ) @property - def total_order_col(self) -> Optional[OrderingColumnReference]: + def total_order_col(self) -> Optional[OrderingExpression]: """Returns column id of columns that defines total ordering, if such as column exists""" if len(self.ordering_value_columns) != 1: return None @@ -226,7 +210,7 @@ def is_sequential(self) -> bool: return self.integer_encoding.is_encoded and self.integer_encoding.is_sequential @property - def all_ordering_columns(self) -> Sequence[OrderingColumnReference]: + def all_ordering_columns(self) -> Sequence[OrderingExpression]: return list(self.ordering_value_columns) @@ -252,3 +236,14 @@ def reencode_order_string( ibis_types.StringColumn, (typing.cast(ibis_types.StringValue, order_id).lpad(length, "0")), ) + + +# Convenience functions +def ascending_over(id: str, nulls_last: bool = True) -> OrderingExpression: + return OrderingExpression(expression.free_var(id), na_last=nulls_last) + + +def descending_over(id: str, nulls_last: bool = True) -> OrderingExpression: + return OrderingExpression( + expression.free_var(id), direction=OrderingDirection.DESC, na_last=nulls_last + ) diff --git a/bigframes/core/reshape/__init__.py b/bigframes/core/reshape/__init__.py index ffbba10936..e3ed8edd21 100644 --- a/bigframes/core/reshape/__init__.py +++ b/bigframes/core/reshape/__init__.py @@ -171,7 +171,7 @@ def qcut( agg_ops.QcutOp(q), # type: ignore window_spec=core.WindowSpec( grouping_keys=(nullity_id,), - ordering=(order.OrderingColumnReference(x._value_column),), + ordering=(order.ascending_over(x._value_column),), ), ) block, result = block.project_expr( diff --git a/bigframes/core/rewrite.py b/bigframes/core/rewrite.py index a518108f4a..61fe28b7b5 100644 --- a/bigframes/core/rewrite.py +++ b/bigframes/core/rewrite.py @@ -16,10 +16,9 @@ import dataclasses import functools import itertools -from typing import Optional, Sequence, Tuple +from typing import Mapping, Optional, Sequence, Tuple import bigframes.core.expression as scalar_exprs -import bigframes.core.guid as guids import bigframes.core.join_def as join_defs import bigframes.core.nodes as nodes import bigframes.core.ordering as order @@ -54,22 +53,24 @@ def from_node(cls, node: nodes.BigFrameNode) -> SquashedSelect: ) return cls(node, selection, None, ()) + @property + def column_lookup(self) -> Mapping[str, scalar_exprs.Expression]: + return {col_id: expr for expr, col_id in self.columns} + def project( self, projection: Tuple[Tuple[scalar_exprs.Expression, str], ...] ) -> SquashedSelect: - lookup = {id: expr for expr, id in self.columns} new_columns = tuple( - (expr.bind_all_variables(lookup), id) for expr, id in projection + (expr.bind_all_variables(self.column_lookup), id) for expr, id in projection ) return SquashedSelect(self.root, new_columns, self.predicate, self.ordering) def filter(self, predicate: scalar_exprs.Expression) -> SquashedSelect: - lookup = {id: expr for expr, id in self.columns} if self.predicate is None: - new_predicate = predicate.bind_all_variables(lookup) + new_predicate = predicate.bind_all_variables(self.column_lookup) else: new_predicate = ops.and_op.as_expr( - self.predicate, predicate.bind_all_variables(lookup) + self.predicate, predicate.bind_all_variables(self.column_lookup) ) return SquashedSelect(self.root, self.columns, new_predicate, self.ordering) @@ -77,15 +78,11 @@ def reverse(self) -> SquashedSelect: new_ordering = tuple(expr.with_reverse() for expr in self.ordering) return SquashedSelect(self.root, self.columns, self.predicate, new_ordering) - def order_with(self, by: Tuple[order.OrderingColumnReference, ...]): - exprs_by_id = {id: expr for expr, id in self.columns} - as_order_exprs = [ - order.OrderingExpression( - exprs_by_id[ref.column_id], ref.direction, ref.na_last - ) - for ref in by + def order_with(self, by: Tuple[order.OrderingExpression, ...]): + adjusted_orderings = [ + order_part.bind_variables(self.column_lookup) for order_part in by ] - new_ordering = (*as_order_exprs, *self.ordering) + new_ordering = (*adjusted_orderings, *self.ordering) return SquashedSelect(self.root, self.columns, self.predicate, new_ordering) def maybe_join( @@ -165,37 +162,12 @@ def maybe_join( def expand(self) -> nodes.BigFrameNode: # Safest to apply predicates first, as it may filter out inputs that cannot be handled by other expressions - root = ( - nodes.FilterNode(child=self.root, predicate=self.predicate) - if self.predicate - else self.root - ) + root = self.root + if self.predicate: + root = nodes.FilterNode(child=root, predicate=self.predicate) if self.ordering: - # Need this clumsy 3-node expansion as OrderByNode doesn't support expressions (yet?) - # Could also directly compile this whole class directly - ordering_assignments = [ - (ref.scalar_expression, guids.generate_guid()) for ref in self.ordering - ] - as_ordering_refs = tuple( - order.OrderingColumnReference(id, ref.direction, ref.na_last) - for ref, (_, id) in zip(self.ordering, ordering_assignments) - ) - extended_projection = nodes.ProjectionNode( - child=root, assignments=(*self.columns, *ordering_assignments) - ) - ordered_node = nodes.OrderByNode( - child=extended_projection, by=as_ordering_refs - ) - drop_ordering_selection = tuple( - (scalar_exprs.UnboundVariableExpression(id), id) - for _, id in self.columns - ) - pruned_node = nodes.ProjectionNode( - child=ordered_node, assignments=drop_ordering_selection - ) - return pruned_node - else: - return nodes.ProjectionNode(child=root, assignments=self.columns) + root = nodes.OrderByNode(child=root, by=self.ordering) + return nodes.ProjectionNode(child=root, assignments=self.columns) def maybe_rewrite_join(join_node: nodes.JoinNode) -> nodes.BigFrameNode: diff --git a/bigframes/core/schema.py b/bigframes/core/schema.py new file mode 100644 index 0000000000..3629778aaf --- /dev/null +++ b/bigframes/core/schema.py @@ -0,0 +1,71 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from dataclasses import dataclass +import functools +import typing + +import bigframes.core.guid +import bigframes.dtypes + +ColumnIdentifierType = str + + +@dataclass(frozen=True) +class SchemaItem: + column: ColumnIdentifierType + dtype: bigframes.dtypes.Dtype + + +@dataclass(frozen=True) +class ArraySchema: + items: typing.Tuple[SchemaItem, ...] + + @property + def names(self) -> typing.Tuple[str, ...]: + return tuple(item.column for item in self.items) + + @property + def dtypes(self) -> typing.Tuple[bigframes.dtypes.Dtype, ...]: + return tuple(item.dtype for item in self.items) + + @functools.cached_property + def _mapping(self) -> typing.Dict[ColumnIdentifierType, bigframes.dtypes.Dtype]: + return {item.column: item.dtype for item in self.items} + + def drop(self, columns: typing.Iterable[str]) -> ArraySchema: + return ArraySchema( + tuple(item for item in self.items if item.column not in columns) + ) + + def append(self, item: SchemaItem): + return ArraySchema(tuple([*self.items, item])) + + def prepend(self, item: SchemaItem): + return ArraySchema(tuple([item, *self.items])) + + def update_dtype( + self, id: ColumnIdentifierType, dtype: bigframes.dtypes.Dtype + ) -> ArraySchema: + return ArraySchema( + tuple( + SchemaItem(id, dtype) if item.column == id else item + for item in self.items + ) + ) + + def get_type(self, id: ColumnIdentifierType): + return self._mapping[id] diff --git a/bigframes/core/tools/datetimes.py b/bigframes/core/tools/datetimes.py index 4aaf320c7a..96bf556101 100644 --- a/bigframes/core/tools/datetimes.py +++ b/bigframes/core/tools/datetimes.py @@ -73,6 +73,14 @@ def to_datetime( f"String and Timestamp requires utc=True. {constants.FEEDBACK_LINK}" ) + if format and unit and arg.dtype in ("Int64", "Float64"): # type: ignore + raise ValueError("cannot specify both format and unit") + + if unit and arg.dtype not in ("Int64", "Float64"): # type: ignore + raise NotImplementedError( + f"Unit parameter is not supported for non-numerical input types. {constants.FEEDBACK_LINK}" + ) + return arg._apply_unary_op( # type: ignore ops.ToDatetimeOp( utc=utc, diff --git a/bigframes/core/utils.py b/bigframes/core/utils.py index 1976ec1e39..97c5ef03e5 100644 --- a/bigframes/core/utils.py +++ b/bigframes/core/utils.py @@ -70,7 +70,9 @@ def split_index( def get_standardized_ids( - col_labels: Iterable[Hashable], idx_labels: Iterable[Hashable] = () + col_labels: Iterable[Hashable], + idx_labels: Iterable[Hashable] = (), + strict: bool = False, ) -> tuple[list[str], list[str]]: """Get stardardized column ids as column_ids_list, index_ids_list. The standardized_column_id must be valid BQ SQL schema column names, can only be string type and unique. @@ -84,11 +86,15 @@ def get_standardized_ids( Tuple of (standardized_column_ids, standardized_index_ids) """ col_ids = [ - UNNAMED_COLUMN_ID if col_label is None else label_to_identifier(col_label) + UNNAMED_COLUMN_ID + if col_label is None + else label_to_identifier(col_label, strict=strict) for col_label in col_labels ] idx_ids = [ - UNNAMED_INDEX_ID if idx_label is None else label_to_identifier(idx_label) + UNNAMED_INDEX_ID + if idx_label is None + else label_to_identifier(idx_label, strict=strict) for idx_label in idx_labels ] @@ -107,6 +113,7 @@ def label_to_identifier(label: typing.Hashable, strict: bool = False) -> str: # Column values will be loaded as null if the column name has spaces. # https://github.com/googleapis/python-bigquery/issues/1566 identifier = str(label).replace(" ", "_") + if strict: identifier = re.sub(r"[^a-zA-Z0-9_]", "", identifier) if not identifier: diff --git a/bigframes/core/window_spec.py b/bigframes/core/window_spec.py index 3458bfb1b8..b02f13d333 100644 --- a/bigframes/core/window_spec.py +++ b/bigframes/core/window_spec.py @@ -29,7 +29,7 @@ class WindowSpec: """ grouping_keys: typing.Tuple[str, ...] = tuple() - ordering: typing.Tuple[orderings.OrderingColumnReference, ...] = tuple() + ordering: typing.Tuple[orderings.OrderingExpression, ...] = tuple() preceding: typing.Optional[int] = None following: typing.Optional[int] = None min_periods: int = 0 diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 0f99a3e4db..07dae2c53b 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -17,6 +17,7 @@ from __future__ import annotations import datetime +import os import re import sys import textwrap @@ -69,10 +70,10 @@ if typing.TYPE_CHECKING: import bigframes.session + SingleItemValue = Union[bigframes.series.Series, int, float, Callable] LevelType = typing.Hashable LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]] -SingleItemValue = Union[bigframes.series.Series, int, float, Callable] ERROR_IO_ONLY_GS_PATHS = f"Only Google Cloud Storage (gs://...) paths are supported. {constants.FEEDBACK_LINK}" ERROR_IO_REQUIRES_WILDCARD = ( @@ -173,6 +174,11 @@ def __init__( self._block = bigframes.pandas.read_pandas(pd_dataframe)._get_block() self._query_job: Optional[bigquery.QueryJob] = None + # Runs strict validations to ensure internal type predictions and ibis are completely in sync + # Do not execute these validations outside of testing suite. + if "PYTEST_CURRENT_TEST" in os.environ: + self._block.expr.validate_schema() + def __dir__(self): return dir(type(self)) + [ label @@ -1013,17 +1019,21 @@ def corr(self, method="pearson", min_periods=None, numeric_only=False) -> DataFr raise NotImplementedError( f"min_periods not yet supported. {constants.FEEDBACK_LINK}" ) - if len(self.columns) > 30: - raise NotImplementedError( - f"Only work with dataframes containing fewer than 30 columns. Current: {len(self.columns)}. {constants.FEEDBACK_LINK}" - ) if not numeric_only: frame = self._raise_on_non_numeric("corr") else: frame = self._drop_non_numeric() - return DataFrame(frame._block.corr()) + return DataFrame(frame._block.calculate_pairwise_metric(op=agg_ops.CorrOp())) + + def cov(self, *, numeric_only: bool = False) -> DataFrame: + if not numeric_only: + frame = self._raise_on_non_numeric("corr") + else: + frame = self._drop_non_numeric() + + return DataFrame(frame._block.calculate_pairwise_metric(agg_ops.CovOp())) def to_pandas( self, @@ -1061,6 +1071,7 @@ def to_pandas( downsampled rows and all columns of this DataFrame. """ # TODO(orrbradford): Optimize this in future. Potentially some cases where we can return the stored query job + df, query_job = self._block.to_pandas( max_download_size=max_download_size, sampling_method=sampling_method, @@ -1438,13 +1449,12 @@ def sort_index( ) -> DataFrame: if na_position not in ["first", "last"]: raise ValueError("Param na_position must be one of 'first' or 'last'") - direction = ( - order.OrderingDirection.ASC if ascending else order.OrderingDirection.DESC - ) na_last = na_position == "last" index_columns = self._block.index_columns ordering = [ - order.OrderingColumnReference(column, direction=direction, na_last=na_last) + order.ascending_over(column, na_last) + if ascending + else order.descending_over(column, na_last) for column in index_columns ] return DataFrame(self._block.order_by(ordering)) @@ -1474,16 +1484,12 @@ def sort_values( ordering = [] for i in range(len(sort_labels)): column_id = sort_column_ids[i] - direction = ( - order.OrderingDirection.ASC - if sort_directions[i] - else order.OrderingDirection.DESC - ) + is_ascending = sort_directions[i] na_last = na_position == "last" ordering.append( - order.OrderingColumnReference( - column_id, direction=direction, na_last=na_last - ) + order.ascending_over(column_id, na_last) + if is_ascending + else order.descending_over(column_id, na_last) ) return DataFrame(self._block.order_by(ordering)) @@ -2497,6 +2503,7 @@ def sample( frac: Optional[float] = None, *, random_state: Optional[int] = None, + sort: Optional[bool | Literal["random"]] = "random", ) -> DataFrame: if n is not None and frac is not None: raise ValueError("Only one of 'n' or 'frac' parameter can be specified.") @@ -2504,7 +2511,9 @@ def sample( ns = (n,) if n is not None else () fracs = (frac,) if frac is not None else () return DataFrame( - self._block._split(ns=ns, fracs=fracs, random_state=random_state)[0] + self._block._split( + ns=ns, fracs=fracs, random_state=random_state, sort=sort + )[0] ) def _split( diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index d78a88dfeb..63adc059f3 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -47,6 +47,9 @@ # None represents the type of a None scalar. ExpressionType = typing.Optional[Dtype] +# Used when storing Null expressions +DEFAULT_DTYPE = pd.Float64Dtype() + INT_DTYPE = pd.Int64Dtype() FLOAT_DTYPE = pd.Float64Dtype() BOOL_DTYPE = pd.BooleanDtype() @@ -191,6 +194,13 @@ } +def dtype_for_etype(etype: ExpressionType) -> Dtype: + if etype is None: + return DEFAULT_DTYPE + else: + return etype + + def ibis_dtype_to_bigframes_dtype( ibis_dtype: ibis_dtypes.DataType, ) -> Dtype: @@ -222,6 +232,12 @@ def ibis_dtype_to_bigframes_dtype( if ibis_dtype in IBIS_TO_BIGFRAMES: return IBIS_TO_BIGFRAMES[ibis_dtype] + elif isinstance(ibis_dtype, ibis_dtypes.Decimal): + # Temporary workaround for ibis decimal issue (b/323387826) + if ibis_dtype.precision >= 76: + return pd.ArrowDtype(pa.decimal256(76, 38)) + else: + return pd.ArrowDtype(pa.decimal128(38, 9)) elif isinstance(ibis_dtype, ibis_dtypes.Null): # Fallback to STRING for NULL values for most flexibility in SQL. return IBIS_TO_BIGFRAMES[ibis_dtypes.string] @@ -280,6 +296,9 @@ def arrow_dtype_to_ibis_dtype(arrow_dtype: pa.DataType) -> ibis_dtypes.DataType: if arrow_dtype in ARROW_TO_IBIS: return ARROW_TO_IBIS[arrow_dtype] + if arrow_dtype == pa.null(): + # Used for empty local dataframes where pyarrow has null type + return ibis_dtypes.float64 else: raise ValueError( f"Unexpected Arrow data type {arrow_dtype}. {constants.FEEDBACK_LINK}" @@ -627,6 +646,14 @@ def infer_literal_type(literal) -> typing.Optional[Dtype]: return ibis_dtype_to_bigframes_dtype(ibis_literal.type()) +def infer_literal_arrow_type(literal) -> typing.Optional[pa.DataType]: + if pd.isna(literal): + return None # Null value without a definite type + # Temporary logic, use ibis inferred type + ibis_literal = literal_to_ibis_scalar(literal) + return ibis_dtype_to_arrow_dtype(ibis_literal.type()) + + # Input and output types supported by BigQuery DataFrames remote functions. # TODO(shobs): Extend the support to all types supported by BQ remote functions # https://cloud.google.com/bigquery/docs/remote-functions#limitations @@ -655,6 +682,17 @@ def ibis_type_from_python_type(t: type) -> ibis_dtypes.DataType: def ibis_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> ibis_dtypes.DataType: + """Convert bq type to ibis. Only to be used for remote functions, does not handle all types.""" if tk not in SUPPORTED_IO_BIGQUERY_TYPEKINDS: raise UnsupportedTypeError(tk, SUPPORTED_IO_BIGQUERY_TYPEKINDS) return third_party_ibis_bqtypes.BigQueryType.to_ibis(tk) + + +def bf_type_from_type_kind(bf_schema) -> Dict[str, Dtype]: + """Converts bigquery sql type to the default bigframes dtype.""" + ibis_schema: ibis.Schema = third_party_ibis_bqtypes.BigQuerySchema.to_ibis( + bf_schema + ) + return { + name: ibis_dtype_to_bigframes_dtype(type) for name, type in ibis_schema.items() + } diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py index 09a9d97869..178c911591 100644 --- a/bigframes/functions/remote_function.py +++ b/bigframes/functions/remote_function.py @@ -126,9 +126,8 @@ def __init__( bq_location, bq_dataset, bq_client, - bq_connection_client, bq_connection_id, - cloud_resource_manager_client, + bq_connection_manager, cloud_function_service_account, cloud_function_kms_key_name, cloud_function_docker_repository, @@ -140,9 +139,7 @@ def __init__( self._bq_dataset = bq_dataset self._bq_client = bq_client self._bq_connection_id = bq_connection_id - self._bq_connection_manager = clients.BqConnectionManager( - bq_connection_client, cloud_resource_manager_client - ) + self._bq_connection_manager = bq_connection_manager self._cloud_function_service_account = cloud_function_service_account self._cloud_function_kms_key_name = cloud_function_kms_key_name self._cloud_function_docker_repository = cloud_function_docker_repository @@ -152,12 +149,13 @@ def create_bq_remote_function( ): """Create a BigQuery remote function given the artifacts of a user defined function and the http endpoint of a corresponding cloud function.""" - self._bq_connection_manager.create_bq_connection( - self._gcp_project_id, - self._bq_location, - self._bq_connection_id, - "run.invoker", - ) + if self._bq_connection_manager: + self._bq_connection_manager.create_bq_connection( + self._gcp_project_id, + self._bq_location, + self._bq_connection_id, + "run.invoker", + ) # Create BQ function # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_remote_function_2 @@ -784,7 +782,7 @@ def remote_function( if not bigquery_connection: bigquery_connection = session._bq_connection # type: ignore - bigquery_connection = clients.BqConnectionManager.resolve_full_connection_name( + bigquery_connection = clients.resolve_full_bq_connection_name( bigquery_connection, default_project=dataset_ref.project, default_location=bq_location, @@ -816,6 +814,8 @@ def remote_function( " For more details see https://cloud.google.com/functions/docs/securing/cmek#before_you_begin" ) + bq_connection_manager = None if session is None else session.bqconnectionmanager + def wrapper(f): if not callable(f): raise TypeError("f must be callable, got {}".format(f)) @@ -832,9 +832,8 @@ def wrapper(f): bq_location, dataset_ref.dataset_id, bigquery_client, - bigquery_connection_client, bq_connection_id, - resource_manager_client, + bq_connection_manager, cloud_function_service_account, cloud_function_kms_key_name, cloud_function_docker_repository, @@ -849,6 +848,7 @@ def wrapper(f): packages, ) + # TODO: Move ibis logic to compiler step node = ibis.udf.scalar.builtin( f, name=rf_name, @@ -859,6 +859,9 @@ def wrapper(f): remote_function_client.get_cloud_function_fully_qualified_name(cf_name) ) node.bigframes_remote_function = str(dataset_ref.routine(rf_name)) # type: ignore + node.output_dtype = bigframes.dtypes.ibis_dtype_to_bigframes_dtype( + ibis_signature.output_type + ) return node return wrapper @@ -913,6 +916,7 @@ def read_gbq_function( def node(*ignored_args, **ignored_kwargs): f"""Remote function {str(routine_ref)}.""" + # TODO: Move ibis logic to compiler step node.__name__ = routine_ref.routine_id node = ibis.udf.scalar.builtin( node, @@ -921,4 +925,7 @@ def node(*ignored_args, **ignored_kwargs): signature=(ibis_signature.input_types, ibis_signature.output_type), ) node.bigframes_remote_function = str(routine_ref) # type: ignore + node.output_dtype = bigframes.dtypes.ibis_dtype_to_bigframes_dtype( # type: ignore + ibis_signature.output_type + ) return node diff --git a/bigframes/ml/cluster.py b/bigframes/ml/cluster.py index c294d1f424..1035def54d 100644 --- a/bigframes/ml/cluster.py +++ b/bigframes/ml/cluster.py @@ -17,7 +17,7 @@ from __future__ import annotations -from typing import Dict, List, Optional, Union +from typing import List, Literal, Optional, Union import bigframes_vendored.sklearn.cluster._kmeans from google.cloud import bigquery @@ -27,6 +27,15 @@ from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd +_BQML_PARAMS_MAPPING = { + "n_clusters": "numClusters", + "init": "kmeansInitializationMethod", + "init_col": "kmeansInitializationColumn", + "distance_type": "distanceType", + "max_iter": "maxIterations", + "tol": "minRelativeProgress", +} + @log_adapter.class_logger class KMeans( @@ -36,8 +45,24 @@ class KMeans( __doc__ = bigframes_vendored.sklearn.cluster._kmeans.KMeans.__doc__ - def __init__(self, n_clusters: int = 8): + def __init__( + self, + n_clusters: int = 8, + *, + init: Literal["kmeans++", "random", "custom"] = "kmeans++", + init_col: Optional[str] = None, + distance_type: Literal["euclidean", "cosine"] = "euclidean", + max_iter: int = 20, + tol: float = 0.01, + warm_start: bool = False, + ): self.n_clusters = n_clusters + self.init = init + self.init_col = init_col + self.distance_type = distance_type + self.max_iter = max_iter + self.tol = tol + self.warm_start = warm_start self._bqml_model: Optional[core.BqmlModel] = None self._bqml_model_factory = globals.bqml_model_factory() @@ -45,21 +70,42 @@ def __init__(self, n_clusters: int = 8): def _from_bq(cls, session: bigframes.Session, model: bigquery.Model) -> KMeans: assert model.model_type == "KMEANS" - kwargs = {} + kwargs: dict = {} # See https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun last_fitting = model.training_runs[-1]["trainingOptions"] - if "numClusters" in last_fitting: - kwargs["n_clusters"] = int(last_fitting["numClusters"]) + dummy_kmeans = cls() + for bf_param, bf_value in dummy_kmeans.__dict__.items(): + bqml_param = _BQML_PARAMS_MAPPING.get(bf_param) + if bqml_param in last_fitting: + # Convert types + kwargs[bf_param] = ( + str(last_fitting[bqml_param]) + if bf_param in ["init"] + else type(bf_value)(last_fitting[bqml_param]) + ) new_kmeans = cls(**kwargs) new_kmeans._bqml_model = core.BqmlModel(session, model) return new_kmeans @property - def _bqml_options(self) -> Dict[str, str | int | float | List[str]]: + def _bqml_options(self) -> dict: """The model options as they will be set for BQML""" - return {"model_type": "KMEANS", "num_clusters": self.n_clusters} + options = { + "model_type": "KMEANS", + "num_clusters": self.n_clusters, + "KMEANS_INIT_METHOD": self.init, + "DISTANCE_TYPE": self.distance_type, + "MAX_ITERATIONS": self.max_iter, + "MIN_REL_PROGRESS": self.tol, + "WARM_START": self.warm_start, + } + + if self.init_col is not None: + options["KMEANS_INIT_COL"] = self.init_col + + return options def _fit( self, diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py index 9dc60be78f..475b4a046f 100644 --- a/bigframes/ml/decomposition.py +++ b/bigframes/ml/decomposition.py @@ -17,7 +17,7 @@ from __future__ import annotations -from typing import List, Optional, Union +from typing import List, Literal, Optional, Union import bigframes_vendored.sklearn.decomposition._pca from google.cloud import bigquery @@ -35,8 +35,14 @@ class PCA( ): __doc__ = bigframes_vendored.sklearn.decomposition._pca.PCA.__doc__ - def __init__(self, n_components: int = 3): + def __init__( + self, + n_components: Optional[Union[int, float]] = None, + *, + svd_solver: Literal["full", "randomized", "auto"] = "auto", + ): self.n_components = n_components + self.svd_solver = svd_solver self._bqml_model: Optional[core.BqmlModel] = None self._bqml_model_factory = globals.bqml_model_factory() @@ -44,17 +50,37 @@ def __init__(self, n_components: int = 3): def _from_bq(cls, session: bigframes.Session, model: bigquery.Model) -> PCA: assert model.model_type == "PCA" - kwargs = {} + kwargs: dict = {} # See https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun last_fitting = model.training_runs[-1]["trainingOptions"] if "numPrincipalComponents" in last_fitting: kwargs["n_components"] = int(last_fitting["numPrincipalComponents"]) + if "pcaExplainedVarianceRatio" in last_fitting: + kwargs["n_components"] = float(last_fitting["pcaExplainedVarianceRatio"]) + if "pcaSolver" in last_fitting: + kwargs["svd_solver"] = str(last_fitting["pcaSolver"]) new_pca = cls(**kwargs) new_pca._bqml_model = core.BqmlModel(session, model) return new_pca + @property + def _bqml_options(self) -> dict: + """The model options as they will be set for BQML""" + options: dict = { + "model_type": "PCA", + "pca_solver": self.svd_solver, + } + + assert self.n_components is not None + if 0 < self.n_components < 1: + options["pca_explained_variance_ratio"] = float(self.n_components) + elif self.n_components >= 1: + options["num_principal_components"] = int(self.n_components) + + return options + def _fit( self, X: Union[bpd.DataFrame, bpd.Series], @@ -63,13 +89,13 @@ def _fit( ) -> PCA: (X,) = utils.convert_to_dataframe(X) + # To mimic sklearn's behavior + if self.n_components is None: + self.n_components = min(X.shape) self._bqml_model = self._bqml_model_factory.create_model( X_train=X, transforms=transforms, - options={ - "model_type": "PCA", - "num_principal_components": self.n_components, - }, + options=self._bqml_options, ) return self diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py index 23b227de67..72ea600c58 100644 --- a/bigframes/ml/ensemble.py +++ b/bigframes/ml/ensemble.py @@ -31,7 +31,6 @@ _BQML_PARAMS_MAPPING = { "booster": "boosterType", "tree_method": "treeMethod", - "early_stop": "earlyStop", "colsample_bytree": "colsampleBylevel", "colsample_bylevel": "colsampleBytree", "colsample_bynode": "colsampleBynode", @@ -40,8 +39,8 @@ "reg_alpha": "l1Regularization", "reg_lambda": "l2Regularization", "learning_rate": "learnRate", - "min_rel_progress": "minRelativeProgress", - "num_parallel_tree": "numParallelTree", + "tol": "minRelativeProgress", + "n_estimators": "numParallelTree", "min_tree_child_weight": "minTreeChildWeight", "max_depth": "maxTreeDepth", "max_iterations": "maxIterations", @@ -57,7 +56,7 @@ class XGBRegressor( def __init__( self, - num_parallel_tree: int = 1, + n_estimators: int = 1, *, booster: Literal["gbtree", "dart"] = "gbtree", dart_normalized_type: Literal["tree", "forest"] = "tree", @@ -71,14 +70,13 @@ def __init__( subsample: float = 1.0, reg_alpha: float = 0.0, reg_lambda: float = 1.0, - early_stop: float = True, learning_rate: float = 0.3, max_iterations: int = 20, - min_rel_progress: float = 0.01, + tol: float = 0.01, enable_global_explain: bool = False, xgboost_version: Literal["0.9", "1.1"] = "0.9", ): - self.num_parallel_tree = num_parallel_tree + self.n_estimators = n_estimators self.booster = booster self.dart_normalized_type = dart_normalized_type self.tree_method = tree_method @@ -91,10 +89,9 @@ def __init__( self.subsample = subsample self.reg_alpha = reg_alpha self.reg_lambda = reg_lambda - self.early_stop = early_stop self.learning_rate = learning_rate self.max_iterations = max_iterations - self.min_rel_progress = min_rel_progress + self.tol = tol self.enable_global_explain = enable_global_explain self.xgboost_version = xgboost_version self._bqml_model: Optional[core.BqmlModel] = None @@ -127,7 +124,8 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]: return { "model_type": "BOOSTED_TREE_REGRESSOR", "data_split_method": "NO_SPLIT", - "num_parallel_tree": self.num_parallel_tree, + "early_stop": True, + "num_parallel_tree": self.n_estimators, "booster_type": self.booster, "tree_method": self.tree_method, "min_tree_child_weight": self.min_tree_child_weight, @@ -139,10 +137,9 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]: "subsample": self.subsample, "l1_reg": self.reg_alpha, "l2_reg": self.reg_lambda, - "early_stop": self.early_stop, "learn_rate": self.learning_rate, "max_iterations": self.max_iterations, - "min_rel_progress": self.min_rel_progress, + "min_rel_progress": self.tol, "enable_global_explain": self.enable_global_explain, "xgboost_version": self.xgboost_version, } @@ -215,7 +212,7 @@ class XGBClassifier( def __init__( self, - num_parallel_tree: int = 1, + n_estimators: int = 1, *, booster: Literal["gbtree", "dart"] = "gbtree", dart_normalized_type: Literal["tree", "forest"] = "tree", @@ -229,14 +226,13 @@ def __init__( subsample: float = 1.0, reg_alpha: float = 0.0, reg_lambda: float = 1.0, - early_stop: bool = True, learning_rate: float = 0.3, max_iterations: int = 20, - min_rel_progress: float = 0.01, + tol: float = 0.01, enable_global_explain: bool = False, xgboost_version: Literal["0.9", "1.1"] = "0.9", ): - self.num_parallel_tree = num_parallel_tree + self.n_estimators = n_estimators self.booster = booster self.dart_normalized_type = dart_normalized_type self.tree_method = tree_method @@ -249,10 +245,9 @@ def __init__( self.subsample = subsample self.reg_alpha = reg_alpha self.reg_lambda = reg_lambda - self.early_stop = early_stop self.learning_rate = learning_rate self.max_iterations = max_iterations - self.min_rel_progress = min_rel_progress + self.tol = tol self.enable_global_explain = enable_global_explain self.xgboost_version = xgboost_version self._bqml_model: Optional[core.BqmlModel] = None @@ -285,7 +280,8 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]: return { "model_type": "BOOSTED_TREE_CLASSIFIER", "data_split_method": "NO_SPLIT", - "num_parallel_tree": self.num_parallel_tree, + "early_stop": True, + "num_parallel_tree": self.n_estimators, "booster_type": self.booster, "tree_method": self.tree_method, "min_tree_child_weight": self.min_tree_child_weight, @@ -297,10 +293,9 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]: "subsample": self.subsample, "l1_reg": self.reg_alpha, "l2_reg": self.reg_lambda, - "early_stop": self.early_stop, "learn_rate": self.learning_rate, "max_iterations": self.max_iterations, - "min_rel_progress": self.min_rel_progress, + "min_rel_progress": self.tol, "enable_global_explain": self.enable_global_explain, "xgboost_version": self.xgboost_version, } @@ -371,7 +366,7 @@ class RandomForestRegressor( def __init__( self, - num_parallel_tree: int = 100, + n_estimators: int = 100, *, tree_method: Literal["auto", "exact", "approx", "hist"] = "auto", min_tree_child_weight: int = 1, @@ -383,12 +378,11 @@ def __init__( subsample=0.8, reg_alpha=0.0, reg_lambda=1.0, - early_stop=True, - min_rel_progress=0.01, + tol=0.01, enable_global_explain=False, xgboost_version: Literal["0.9", "1.1"] = "0.9", ): - self.num_parallel_tree = num_parallel_tree + self.n_estimators = n_estimators self.tree_method = tree_method self.min_tree_child_weight = min_tree_child_weight self.colsample_bytree = colsample_bytree @@ -399,8 +393,7 @@ def __init__( self.subsample = subsample self.reg_alpha = reg_alpha self.reg_lambda = reg_lambda - self.early_stop = early_stop - self.min_rel_progress = min_rel_progress + self.tol = tol self.enable_global_explain = enable_global_explain self.xgboost_version = xgboost_version self._bqml_model: Optional[core.BqmlModel] = None @@ -432,7 +425,8 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]: """The model options as they will be set for BQML""" return { "model_type": "RANDOM_FOREST_REGRESSOR", - "num_parallel_tree": self.num_parallel_tree, + "early_stop": True, + "num_parallel_tree": self.n_estimators, "tree_method": self.tree_method, "min_tree_child_weight": self.min_tree_child_weight, "colsample_bytree": self.colsample_bytree, @@ -443,8 +437,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]: "subsample": self.subsample, "l1_reg": self.reg_alpha, "l2_reg": self.reg_lambda, - "early_stop": self.early_stop, - "min_rel_progress": self.min_rel_progress, + "min_rel_progress": self.tol, "data_split_method": "NO_SPLIT", "enable_global_explain": self.enable_global_explain, "xgboost_version": self.xgboost_version, @@ -536,7 +529,7 @@ class RandomForestClassifier( def __init__( self, - num_parallel_tree: int = 100, + n_estimators: int = 100, *, tree_method: Literal["auto", "exact", "approx", "hist"] = "auto", min_tree_child_weight: int = 1, @@ -548,12 +541,11 @@ def __init__( subsample: float = 0.8, reg_alpha: float = 0.0, reg_lambda: float = 1.0, - early_stop=True, - min_rel_progress: float = 0.01, + tol: float = 0.01, enable_global_explain=False, xgboost_version: Literal["0.9", "1.1"] = "0.9", ): - self.num_parallel_tree = num_parallel_tree + self.n_estimators = n_estimators self.tree_method = tree_method self.min_tree_child_weight = min_tree_child_weight self.colsample_bytree = colsample_bytree @@ -564,8 +556,7 @@ def __init__( self.subsample = subsample self.reg_alpha = reg_alpha self.reg_lambda = reg_lambda - self.early_stop = early_stop - self.min_rel_progress = min_rel_progress + self.tol = tol self.enable_global_explain = enable_global_explain self.xgboost_version = xgboost_version self._bqml_model: Optional[core.BqmlModel] = None @@ -597,7 +588,8 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]: """The model options as they will be set for BQML""" return { "model_type": "RANDOM_FOREST_CLASSIFIER", - "num_parallel_tree": self.num_parallel_tree, + "early_stop": True, + "num_parallel_tree": self.n_estimators, "tree_method": self.tree_method, "min_tree_child_weight": self.min_tree_child_weight, "colsample_bytree": self.colsample_bytree, @@ -608,8 +600,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]: "subsample": self.subsample, "l1_reg": self.reg_alpha, "l2_reg": self.reg_lambda, - "early_stop": self.early_stop, - "min_rel_progress": self.min_rel_progress, + "min_rel_progress": self.tol, "data_split_method": "NO_SPLIT", "enable_global_explain": self.enable_global_explain, "xgboost_version": self.xgboost_version, diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py index 18380328c7..292389dcbb 100644 --- a/bigframes/ml/forecasting.py +++ b/bigframes/ml/forecasting.py @@ -16,7 +16,7 @@ from __future__ import annotations -from typing import Dict, List, Optional, Union +from typing import List, Optional, Union from google.cloud import bigquery @@ -25,12 +25,108 @@ from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd +_BQML_PARAMS_MAPPING = { + "horizon": "horizon", + "auto_arima": "autoArima", + "auto_arima_max_order": "autoArimaMaxOrder", + "auto_arima_min_order": "autoArimaMinOrder", + "order": "nonSeasonalOrder", + "data_frequency": "dataFrequency", + "holiday_region": "holidayRegion", + "clean_spikes_and_dips": "cleanSpikesAndDips", + "adjust_step_changes": "adjustStepChanges", + "time_series_length_fraction": "timeSeriesLengthFraction", + "min_time_series_length": "minTimeSeriesLength", + "max_time_series_length": "maxTimeSeriesLength", + "decompose_time_series": "decomposeTimeSeries", + "trend_smoothing_window_size": "trendSmoothingWindowSize", +} + @log_adapter.class_logger class ARIMAPlus(base.SupervisedTrainablePredictor): - """Time Series ARIMA Plus model.""" + """Time Series ARIMA Plus model. + + Args: + horizon (int, default 1,000): + The number of time points to forecast. Default to 1,000, max value 10,000. + + auto_arima (bool, default True): + Determines whether the training process uses auto.ARIMA or not. If True, training automatically finds the best non-seasonal order (that is, the p, d, q tuple) and decides whether or not to include a linear drift term when d is 1. + + auto_arima_max_order (int or None, default None): + The maximum value for the sum of non-seasonal p and q. + + auto_arima_min_order (int or None, default None): + The minimum value for the sum of non-seasonal p and q. + + data_frequency (str, default "auto_frequency"): + The data frequency of the input time series. + Possible values are "auto_frequency", "per_minute", "hourly", "daily", "weekly", "monthly", "quarterly", "yearly" + + include_drift (bool, defalut False): + Determines whether the model should include a linear drift term or not. The drift term is applicable when non-seasonal d is 1. + + holiday_region (str or None, default None): + The geographical region based on which the holiday effect is applied in modeling. By default, holiday effect modeling isn't used. + Possible values see https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-create-time-series#holiday_region. + + clean_spikes_and_dips (bool, default True): + Determines whether or not to perform automatic spikes and dips detection and cleanup in the model training pipeline. The spikes and dips are replaced with local linear interpolated values when they're detected. + + adjust_step_changes (bool, default True): + Determines whether or not to perform automatic step change detection and adjustment in the model training pipeline. + + time_series_length_fraction (float or None, default None): + The fraction of the interpolated length of the time series that's used to model the time series trend component. All of the time points of the time series are used to model the non-trend component. + + min_time_series_length (int or None, default None): + The minimum number of time points that are used in modeling the trend component of the time series. + + max_time_series_length (int or None, default None): + The maximum number of time points in a time series that can be used in modeling the trend component of the time series. + + trend_smoothing_window_size (int or None, default None): + The smoothing window size for the trend component. + + decompose_time_series (bool, default True): + Determines whether the separate components of both the history and forecast parts of the time series (such as holiday effect and seasonal components) are saved in the model. + """ + + def __init__( + self, + *, + horizon: int = 1000, + auto_arima: bool = True, + auto_arima_max_order: Optional[int] = None, + auto_arima_min_order: Optional[int] = None, + data_frequency: str = "auto_frequency", + include_drift: bool = False, + holiday_region: Optional[str] = None, + clean_spikes_and_dips: bool = True, + adjust_step_changes: bool = True, + time_series_length_fraction: Optional[float] = None, + min_time_series_length: Optional[int] = None, + max_time_series_length: Optional[int] = None, + trend_smoothing_window_size: Optional[int] = None, + decompose_time_series: bool = True, + ): + self.horizon = horizon + self.auto_arima = auto_arima + self.auto_arima_max_order = auto_arima_max_order + self.auto_arima_min_order = auto_arima_min_order + self.data_frequency = data_frequency + self.include_drift = include_drift + self.holiday_region = holiday_region + self.clean_spikes_and_dips = clean_spikes_and_dips + self.adjust_step_changes = adjust_step_changes + self.time_series_length_fraction = time_series_length_fraction + self.min_time_series_length = min_time_series_length + self.max_time_series_length = max_time_series_length + self.trend_smoothing_window_size = trend_smoothing_window_size + self.decompose_time_series = decompose_time_series + # TODO(garrettwu) add order and seasonalities params, which need struct/array - def __init__(self): self._bqml_model: Optional[core.BqmlModel] = None self._bqml_model_factory = globals.bqml_model_factory() @@ -38,16 +134,65 @@ def __init__(self): def _from_bq(cls, session: bigframes.Session, model: bigquery.Model) -> ARIMAPlus: assert model.model_type == "ARIMA_PLUS" - kwargs: Dict[str, str | int | bool | float | List[str]] = {} + kwargs: dict = {} + last_fitting = model.training_runs[-1]["trainingOptions"] + + dummy_arima = cls() + for bf_param, bf_value in dummy_arima.__dict__.items(): + bqml_param = _BQML_PARAMS_MAPPING.get(bf_param) + if bqml_param in last_fitting: + # Convert types + if bf_param in ["time_series_length_fraction"]: + kwargs[bf_param] = float(last_fitting[bqml_param]) + elif bf_param in [ + "auto_arima_max_order", + "auto_arima_min_order", + "min_time_series_length", + "max_time_series_length", + "trend_smoothing_window_size", + ]: + kwargs[bf_param] = int(last_fitting[bqml_param]) + elif bf_param in ["holiday_region"]: + kwargs[bf_param] = str(last_fitting[bqml_param]) + else: + kwargs[bf_param] = type(bf_value)(last_fitting[bqml_param]) new_arima_plus = cls(**kwargs) new_arima_plus._bqml_model = core.BqmlModel(session, model) return new_arima_plus @property - def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]: + def _bqml_options(self) -> dict: """The model options as they will be set for BQML.""" - return {"model_type": "ARIMA_PLUS"} + options = { + "model_type": "ARIMA_PLUS", + "horizon": self.horizon, + "auto_arima": self.auto_arima, + "data_frequency": self.data_frequency, + "clean_spikes_and_dips": self.clean_spikes_and_dips, + "adjust_step_changes": self.adjust_step_changes, + "decompose_time_series": self.decompose_time_series, + } + + if self.auto_arima_max_order is not None: + options["auto_arima_max_order"] = self.auto_arima_max_order + if self.auto_arima_min_order is not None: + options["auto_arima_min_order"] = self.auto_arima_min_order + if self.holiday_region is not None: + options["holiday_region"] = self.holiday_region + if self.time_series_length_fraction is not None: + options["time_series_length_fraction"] = self.time_series_length_fraction + if self.min_time_series_length is not None: + options["min_time_series_length"] = self.min_time_series_length + if self.max_time_series_length is not None: + options["max_time_series_length"] = self.max_time_series_length + if self.trend_smoothing_window_size is not None: + options["trend_smoothing_window_size"] = self.trend_smoothing_window_size + + if self.include_drift: + options["include_drift"] = True + + return options def _fit( self, diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py index 68d1e12676..c0abe77b9f 100644 --- a/bigframes/ml/linear_model.py +++ b/bigframes/ml/linear_model.py @@ -35,11 +35,10 @@ "l1_reg": "l1Regularization", "l2_reg": "l2Regularization", "max_iterations": "maxIterations", - "learn_rate_strategy": "learnRateStrategy", - "learn_rate": "learnRate", - "early_stop": "earlyStop", - "min_rel_progress": "minRelativeProgress", - "ls_init_learn_rate": "initialLearnRate", + "learning_rate_strategy": "learnRateStrategy", + "learning_rate": "learnRate", + "tol": "minRelativeProgress", + "ls_init_learning_rate": "initialLearnRate", "warm_start": "warmStart", "calculate_p_values": "calculatePValues", "enable_global_explain": "enableGlobalExplain", @@ -59,25 +58,29 @@ def __init__( *, optimize_strategy: Literal[ "auto_strategy", "batch_gradient_descent", "normal_equation" - ] = "normal_equation", + ] = "auto_strategy", fit_intercept: bool = True, + l1_reg: Optional[float] = None, l2_reg: float = 0.0, max_iterations: int = 20, - learn_rate_strategy: Literal["line_search", "constant"] = "line_search", - early_stop: bool = True, - min_rel_progress: float = 0.01, - ls_init_learn_rate: float = 0.1, + warm_start: bool = False, + learning_rate: Optional[float] = None, + learning_rate_strategy: Literal["line_search", "constant"] = "line_search", + tol: float = 0.01, + ls_init_learning_rate: Optional[float] = None, calculate_p_values: bool = False, enable_global_explain: bool = False, ): self.optimize_strategy = optimize_strategy self.fit_intercept = fit_intercept + self.l1_reg = l1_reg self.l2_reg = l2_reg self.max_iterations = max_iterations - self.learn_rate_strategy = learn_rate_strategy - self.early_stop = early_stop - self.min_rel_progress = min_rel_progress - self.ls_init_learn_rate = ls_init_learn_rate + self.warm_start = warm_start + self.learning_rate = learning_rate + self.learning_rate_strategy = learning_rate_strategy + self.tol = tol + self.ls_init_learning_rate = ls_init_learning_rate self.calculate_p_values = calculate_p_values self.enable_global_explain = enable_global_explain self._bqml_model: Optional[core.BqmlModel] = None @@ -99,30 +102,43 @@ def _from_bq( for bf_param, bf_value in dummy_linear.__dict__.items(): bqml_param = _BQML_PARAMS_MAPPING.get(bf_param) if bqml_param in last_fitting: - kwargs[bf_param] = type(bf_value)(last_fitting[bqml_param]) + # Convert types + kwargs[bf_param] = ( + float(last_fitting[bqml_param]) + if bf_param in ["l1_reg", "learning_rate", "ls_init_learning_rate"] + else type(bf_value)(last_fitting[bqml_param]) + ) new_linear_regression = cls(**kwargs) new_linear_regression._bqml_model = core.BqmlModel(session, model) return new_linear_regression @property - def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]: + def _bqml_options(self) -> dict: """The model options as they will be set for BQML""" - # TODO: Support l1_reg, warm_start, and learn_rate with error catching. - return { + options = { "model_type": "LINEAR_REG", "data_split_method": "NO_SPLIT", "optimize_strategy": self.optimize_strategy, "fit_intercept": self.fit_intercept, "l2_reg": self.l2_reg, "max_iterations": self.max_iterations, - "learn_rate_strategy": self.learn_rate_strategy, - "early_stop": self.early_stop, - "min_rel_progress": self.min_rel_progress, - "ls_init_learn_rate": self.ls_init_learn_rate, + "learn_rate_strategy": self.learning_rate_strategy, + "min_rel_progress": self.tol, "calculate_p_values": self.calculate_p_values, "enable_global_explain": self.enable_global_explain, } + if self.l1_reg is not None: + options["l1_reg"] = self.l1_reg + if self.learning_rate is not None: + options["learn_rate"] = self.learning_rate + if self.ls_init_learning_rate is not None: + options["ls_init_learn_rate"] = self.ls_init_learning_rate + # Even presenting warm_start returns error for NORMAL_EQUATION optimizer + if self.warm_start: + options["warm_start"] = self.warm_start + + return options def _fit( self, @@ -188,16 +204,40 @@ class LogisticRegression( bigframes_vendored.sklearn.linear_model._logistic.LogisticRegression.__doc__ ) - # TODO(ashleyxu) support class_weights in the constructor. + # TODO(ashleyxu) support class_weight in the constructor. def __init__( self, *, + optimize_strategy: Literal[ + "auto_strategy", "batch_gradient_descent", "normal_equation" + ] = "auto_strategy", fit_intercept: bool = True, - class_weights: Optional[Union[Literal["balanced"], Dict[str, float]]] = None, + l1_reg: Optional[float] = None, + l2_reg: float = 0.0, + max_iterations: int = 20, + warm_start: bool = False, + learning_rate: Optional[float] = None, + learning_rate_strategy: Literal["line_search", "constant"] = "line_search", + tol: float = 0.01, + ls_init_learning_rate: Optional[float] = None, + calculate_p_values: bool = False, + enable_global_explain: bool = False, + class_weight: Optional[Union[Literal["balanced"], Dict[str, float]]] = None, ): + self.optimize_strategy = optimize_strategy self.fit_intercept = fit_intercept - self.class_weights = class_weights - self._auto_class_weight = class_weights == "balanced" + self.l1_reg = l1_reg + self.l2_reg = l2_reg + self.max_iterations = max_iterations + self.warm_start = warm_start + self.learning_rate = learning_rate + self.learning_rate_strategy = learning_rate_strategy + self.tol = tol + self.ls_init_learning_rate = ls_init_learning_rate + self.calculate_p_values = calculate_p_values + self.enable_global_explain = enable_global_explain + self.class_weight = class_weight + self._auto_class_weight = class_weight == "balanced" self._bqml_model: Optional[core.BqmlModel] = None self._bqml_model_factory = globals.bqml_model_factory() @@ -211,29 +251,55 @@ def _from_bq( # See https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun last_fitting = model.training_runs[-1]["trainingOptions"] - if "fitIntercept" in last_fitting: - kwargs["fit_intercept"] = last_fitting["fitIntercept"] + dummy_logistic = cls() + for bf_param, bf_value in dummy_logistic.__dict__.items(): + bqml_param = _BQML_PARAMS_MAPPING.get(bf_param) + if bqml_param in last_fitting: + # Convert types + kwargs[bf_param] = ( + float(last_fitting[bqml_param]) + if bf_param in ["l1_reg", "learning_rate", "ls_init_learning_rate"] + else type(bf_value)(last_fitting[bqml_param]) + ) if last_fitting["autoClassWeights"]: - kwargs["class_weights"] = "balanced" - # TODO(ashleyxu) support class_weights in the constructor. + kwargs["class_weight"] = "balanced" + # TODO(ashleyxu) support class_weight in the constructor. # if "labelClassWeights" in last_fitting: - # kwargs["class_weights"] = last_fitting["labelClassWeights"] + # kwargs["class_weight"] = last_fitting["labelClassWeights"] new_logistic_regression = cls(**kwargs) new_logistic_regression._bqml_model = core.BqmlModel(session, model) return new_logistic_regression @property - def _bqml_options(self) -> Dict[str, str | int | float | List[str]]: + def _bqml_options(self) -> dict: """The model options as they will be set for BQML""" - return { + options = { "model_type": "LOGISTIC_REG", "data_split_method": "NO_SPLIT", "fit_intercept": self.fit_intercept, "auto_class_weights": self._auto_class_weight, - # TODO(ashleyxu): support class_weights (struct array as dict in our API) - # "class_weights": self.class_weights, + "optimize_strategy": self.optimize_strategy, + "l2_reg": self.l2_reg, + "max_iterations": self.max_iterations, + "learn_rate_strategy": self.learning_rate_strategy, + "min_rel_progress": self.tol, + "calculate_p_values": self.calculate_p_values, + "enable_global_explain": self.enable_global_explain, + # TODO(ashleyxu): support class_weight (struct array as dict in our API) + # "class_weight": self.class_weight, } + if self.l1_reg is not None: + options["l1_reg"] = self.l1_reg + if self.learning_rate is not None: + options["learn_rate"] = self.learning_rate + if self.ls_init_learning_rate is not None: + options["ls_init_learn_rate"] = self.ls_init_learning_rate + # Even presenting warm_start returns error for NORMAL_EQUATION optimizer + if self.warm_start: + options["warm_start"] = self.warm_start + + return options def _fit( self, @@ -290,10 +356,10 @@ def to_gbq(self, model_name: str, replace: bool = False) -> LogisticRegression: if not self._bqml_model: raise RuntimeError("A model must be fitted before it can be saved") - # TODO(ashleyxu): support class_weights (struct array as dict in our API) - if self.class_weights not in (None, "balanced"): + # TODO(ashleyxu): support class_weight (struct array as dict in our API) + if self.class_weight not in (None, "balanced"): raise NotImplementedError( - f"class_weights is not supported yet. {constants.FEEDBACK_LINK}" + f"class_weight is not supported yet. {constants.FEEDBACK_LINK}" ) new_model = self._bqml_model.copy(model_name, replace) diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index 10c3cc51b2..6c4ae2ea43 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -73,12 +73,10 @@ def __init__( ): self.model_name = model_name self.session = session or bpd.get_global_session() - self._bq_connection_manager = clients.BqConnectionManager( - self.session.bqconnectionclient, self.session.resourcemanagerclient - ) + self._bq_connection_manager = self.session.bqconnectionmanager connection_name = connection_name or self.session._bq_connection - self.connection_name = self._bq_connection_manager.resolve_full_connection_name( + self.connection_name = clients.resolve_full_bq_connection_name( connection_name, default_project=self.session._project, default_location=self.session._location, @@ -93,17 +91,19 @@ def _create_bqml_model(self): raise ValueError( "Must provide connection_name, either in constructor or through session options." ) - connection_name_parts = self.connection_name.split(".") - if len(connection_name_parts) != 3: - raise ValueError( - f"connection_name must be of the format .., got {self.connection_name}." + + if self._bq_connection_manager: + connection_name_parts = self.connection_name.split(".") + if len(connection_name_parts) != 3: + raise ValueError( + f"connection_name must be of the format .., got {self.connection_name}." + ) + self._bq_connection_manager.create_bq_connection( + project_id=connection_name_parts[0], + location=connection_name_parts[1], + connection_id=connection_name_parts[2], + iam_role="aiplatform.user", ) - self._bq_connection_manager.create_bq_connection( - project_id=connection_name_parts[0], - location=connection_name_parts[1], - connection_id=connection_name_parts[2], - iam_role="aiplatform.user", - ) if self.model_name not in _TEXT_GENERATOR_ENDPOINTS: raise ValueError( @@ -289,12 +289,10 @@ def __init__( self.model_name = model_name self.version = version self.session = session or bpd.get_global_session() - self._bq_connection_manager = clients.BqConnectionManager( - self.session.bqconnectionclient, self.session.resourcemanagerclient - ) + self._bq_connection_manager = self.session.bqconnectionmanager connection_name = connection_name or self.session._bq_connection - self.connection_name = self._bq_connection_manager.resolve_full_connection_name( + self.connection_name = clients.resolve_full_bq_connection_name( connection_name, default_project=self.session._project, default_location=self.session._location, @@ -309,17 +307,19 @@ def _create_bqml_model(self): raise ValueError( "Must provide connection_name, either in constructor or through session options." ) - connection_name_parts = self.connection_name.split(".") - if len(connection_name_parts) != 3: - raise ValueError( - f"connection_name must be of the format .., got {self.connection_name}." + + if self._bq_connection_manager: + connection_name_parts = self.connection_name.split(".") + if len(connection_name_parts) != 3: + raise ValueError( + f"connection_name must be of the format .., got {self.connection_name}." + ) + self._bq_connection_manager.create_bq_connection( + project_id=connection_name_parts[0], + location=connection_name_parts[1], + connection_id=connection_name_parts[2], + iam_role="aiplatform.user", ) - self._bq_connection_manager.create_bq_connection( - project_id=connection_name_parts[0], - location=connection_name_parts[1], - connection_id=connection_name_parts[2], - iam_role="aiplatform.user", - ) if self.model_name not in _EMBEDDING_GENERATOR_ENDPOINTS: raise ValueError( @@ -437,12 +437,10 @@ def __init__( connection_name: Optional[str] = None, ): self.session = session or bpd.get_global_session() - self._bq_connection_manager = clients.BqConnectionManager( - self.session.bqconnectionclient, self.session.resourcemanagerclient - ) + self._bq_connection_manager = self.session.bqconnectionmanager connection_name = connection_name or self.session._bq_connection - self.connection_name = self._bq_connection_manager.resolve_full_connection_name( + self.connection_name = clients.resolve_full_bq_connection_name( connection_name, default_project=self.session._project, default_location=self.session._location, @@ -457,17 +455,19 @@ def _create_bqml_model(self): raise ValueError( "Must provide connection_name, either in constructor or through session options." ) - connection_name_parts = self.connection_name.split(".") - if len(connection_name_parts) != 3: - raise ValueError( - f"connection_name must be of the format .., got {self.connection_name}." + + if self._bq_connection_manager: + connection_name_parts = self.connection_name.split(".") + if len(connection_name_parts) != 3: + raise ValueError( + f"connection_name must be of the format .., got {self.connection_name}." + ) + self._bq_connection_manager.create_bq_connection( + project_id=connection_name_parts[0], + location=connection_name_parts[1], + connection_id=connection_name_parts[2], + iam_role="aiplatform.user", ) - self._bq_connection_manager.create_bq_connection( - project_id=connection_name_parts[0], - location=connection_name_parts[1], - connection_id=connection_name_parts[2], - iam_role="aiplatform.user", - ) options = {"endpoint": _GEMINI_PRO_ENDPOINT} diff --git a/bigframes/ml/remote.py b/bigframes/ml/remote.py index 2b83382e68..8cf892f536 100644 --- a/bigframes/ml/remote.py +++ b/bigframes/ml/remote.py @@ -62,11 +62,9 @@ def __init__( self.output = output self.session = session or bpd.get_global_session() - self._bq_connection_manager = clients.BqConnectionManager( - self.session.bqconnectionclient, self.session.resourcemanagerclient - ) + self._bq_connection_manager = self.session.bqconnectionmanager connection_name = connection_name or self.session._bq_connection - self.connection_name = self._bq_connection_manager.resolve_full_connection_name( + self.connection_name = clients.resolve_full_bq_connection_name( connection_name, default_project=self.session._project, default_location=self.session._location, @@ -81,17 +79,19 @@ def _create_bqml_model(self): raise ValueError( "Must provide connection_name, either in constructor or through session options." ) - connection_name_parts = self.connection_name.split(".") - if len(connection_name_parts) != 3: - raise ValueError( - f"connection_name must be of the format .., got {self.connection_name}." + + if self._bq_connection_manager: + connection_name_parts = self.connection_name.split(".") + if len(connection_name_parts) != 3: + raise ValueError( + f"connection_name must be of the format .., got {self.connection_name}." + ) + self._bq_connection_manager.create_bq_connection( + project_id=connection_name_parts[0], + location=connection_name_parts[1], + connection_id=connection_name_parts[2], + iam_role="aiplatform.user", ) - self._bq_connection_manager.create_bq_connection( - project_id=connection_name_parts[0], - location=connection_name_parts[1], - connection_id=connection_name_parts[2], - iam_role="aiplatform.user", - ) options = { "endpoint": self.endpoint, diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py index fa74458e77..807fadc06a 100644 --- a/bigframes/ml/sql.py +++ b/bigframes/ml/sql.py @@ -38,7 +38,9 @@ def encode_value(self, v: Union[str, int, float, Iterable[str]]) -> str: inner = ", ".join([self.encode_value(x) for x in v]) return f"[{inner}]" else: - raise ValueError(f"Unexpected value type. {constants.FEEDBACK_LINK}") + raise ValueError( + f"Unexpected value type {type(v)}. {constants.FEEDBACK_LINK}" + ) def build_parameters(self, **kwargs: Union[str, int, float, Iterable[str]]) -> str: """Encode a dict of values into a formatted Iterable of key-value pairs for SQL""" diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index b122f1fe7c..4ecb8dca5a 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -18,6 +18,8 @@ import typing import numpy as np +import pandas as pd +import pyarrow as pa import bigframes.dtypes as dtypes import bigframes.operations.type as op_typing @@ -40,6 +42,11 @@ def arguments(self) -> int: def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: ... + @property + def order_preserving(self) -> bool: + """Whether the row operation preserves total ordering. Can be pruned from ordering expressions.""" + ... + # These classes can be used to create simple ops that don't take local parameters # All is needed is a unique name, and to register an implementation in ibis_mappings.py @@ -65,6 +72,11 @@ def as_expr( self, (_convert_expr_input(input_id),) ) + @property + def order_preserving(self) -> bool: + """Whether the row operation preserves total ordering. Can be pruned from ordering expressions.""" + return False + @dataclasses.dataclass(frozen=True) class BinaryOp: @@ -94,6 +106,11 @@ def as_expr( ), ) + @property + def order_preserving(self) -> bool: + """Whether the row operation preserves total ordering. Can be pruned from ordering expressions.""" + return False + @dataclasses.dataclass(frozen=True) class TernaryOp: @@ -125,6 +142,11 @@ def as_expr( ), ) + @property + def order_preserving(self) -> bool: + """Whether the row operation preserves total ordering. Can be pruned from ordering expressions.""" + return False + def _convert_expr_input( input: typing.Union[str, bigframes.core.expression.Expression] @@ -198,14 +220,19 @@ def create_ternary_op( ## DateTime Ops day_op = create_unary_op(name="day", type_rule=op_typing.INTEGER) dayofweek_op = create_unary_op(name="dayofweek", type_rule=op_typing.INTEGER) -date_op = create_unary_op(name="date") +date_op = create_unary_op( + name="date", type_rule=op_typing.Fixed(pd.ArrowDtype(pa.date32())) +) hour_op = create_unary_op(name="hour", type_rule=op_typing.INTEGER) minute_op = create_unary_op(name="minute", type_rule=op_typing.INTEGER) month_op = create_unary_op(name="month", type_rule=op_typing.INTEGER) quarter_op = create_unary_op(name="quarter", type_rule=op_typing.INTEGER) second_op = create_unary_op(name="second", type_rule=op_typing.INTEGER) -time_op = create_unary_op(name="time", type_rule=op_typing.INTEGER) +time_op = create_unary_op( + name="time", type_rule=op_typing.Fixed(pd.ArrowDtype(pa.time64("us"))) +) year_op = create_unary_op(name="year", type_rule=op_typing.INTEGER) +normalize_op = create_unary_op(name="normalize") ## Trigonometry Ops sin_op = create_unary_op(name="sin", type_rule=op_typing.REAL_NUMERIC) cos_op = create_unary_op(name="cos", type_rule=op_typing.REAL_NUMERIC) @@ -321,7 +348,7 @@ class StrFindOp(UnaryOp): end: typing.Optional[int] def output_type(self, *input_types): - return dtypes.BOOL_DTYPE + return dtypes.INT_DTYPE @dataclasses.dataclass(frozen=True) @@ -359,6 +386,14 @@ class StructFieldOp(UnaryOp): name: typing.ClassVar[str] = "struct_field" name_or_index: str | int + def output_type(self, *input_types): + pd_type = typing.cast(pd.ArrowDtype, input_types[0]) + pa_struct_t = typing.cast(pa.StructType, pd_type.pyarrow_dtype) + pa_result_type = pa_struct_t[self.name_or_index].type + # TODO: Directly convert from arrow to pandas type + ibis_result_type = dtypes.arrow_dtype_to_ibis_dtype(pa_result_type) + return dtypes.ibis_dtype_to_bigframes_dtype(ibis_result_type) + @dataclasses.dataclass(frozen=True) class AsTypeOp(UnaryOp): @@ -367,6 +402,9 @@ class AsTypeOp(UnaryOp): to_type: dtypes.DtypeString | dtypes.Dtype def output_type(self, *input_types): + # TODO: We should do this conversion earlier + if self.to_type == pa.string(): + return dtypes.STRING_DTYPE if isinstance(self.to_type, str): return dtypes.BIGFRAMES_STRING_TO_BIGFRAMES[self.to_type] return self.to_type @@ -389,10 +427,8 @@ class RemoteFunctionOp(UnaryOp): apply_on_null: bool def output_type(self, *input_types): - python_type = self.func.__signature__.output_type - ibis_type = dtypes.ibis_type_from_python_type(python_type) - dtype = dtypes.ibis_dtype_to_bigframes_dtype(ibis_type) - return dtype + # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method + return self.func.output_dtype @dataclasses.dataclass(frozen=True) @@ -412,7 +448,8 @@ class ToDatetimeOp(UnaryOp): unit: typing.Optional[str] = None def output_type(self, *input_types): - return input_types[0] + timezone = "UTC" if self.utc else None + return pd.ArrowDtype(pa.timestamp("us", tz=timezone)) @dataclasses.dataclass(frozen=True) @@ -424,6 +461,15 @@ def output_type(self, *input_types): return dtypes.STRING_DTYPE +@dataclasses.dataclass(frozen=True) +class FloorDtOp(UnaryOp): + name: typing.ClassVar[str] = "floor_dt" + freq: str + + def output_type(self, *input_types): + return input_types[0] + + # Binary Ops fillna_op = create_binary_op(name="fillna") cliplower_op = create_binary_op(name="clip_lower") @@ -434,14 +480,14 @@ def output_type(self, *input_types): sub_op = create_binary_op(name="sub", type_rule=op_typing.NUMERIC) mul_op = create_binary_op(name="mul", type_rule=op_typing.NUMERIC) div_op = create_binary_op(name="div", type_rule=op_typing.REAL_NUMERIC) -floordiv_op = create_binary_op(name="floordiv", type_rule=op_typing.REAL_NUMERIC) -pow_op = create_binary_op(name="pow", type_rule=op_typing.REAL_NUMERIC) +floordiv_op = create_binary_op(name="floordiv", type_rule=op_typing.NUMERIC) +pow_op = create_binary_op(name="pow", type_rule=op_typing.NUMERIC) mod_op = create_binary_op(name="mod", type_rule=op_typing.NUMERIC) round_op = create_binary_op(name="round", type_rule=op_typing.REAL_NUMERIC) unsafe_pow_op = create_binary_op(name="unsafe_pow_op", type_rule=op_typing.REAL_NUMERIC) # Logical Ops -and_op = create_binary_op(name="and", type_rule=op_typing.PREDICATE) -or_op = create_binary_op(name="or", type_rule=op_typing.PREDICATE) +and_op = create_binary_op(name="and") +or_op = create_binary_op(name="or") ## Comparison Ops eq_op = create_binary_op(name="eq", type_rule=op_typing.PREDICATE) diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py index 5c9d771f61..ad5abb4bca 100644 --- a/bigframes/operations/_matplotlib/core.py +++ b/bigframes/operations/_matplotlib/core.py @@ -14,8 +14,15 @@ import abc import typing +import uuid -import matplotlib.pyplot as plt +import pandas as pd + +import bigframes.constants as constants +import bigframes.dtypes as dtypes + +DEFAULT_SAMPLING_N = 1000 +DEFAULT_SAMPLING_STATE = 0 class MPLPlot(abc.ABC): @@ -24,6 +31,11 @@ def generate(self): pass def draw(self) -> None: + # This import can fail with "Matplotlib failed to acquire the + # following lock file" so import here to reduce the chance of + # our parallel test suite from triggering this. + import matplotlib.pyplot as plt + plt.draw_if_interactive() @property @@ -38,20 +50,26 @@ def _kind(self): def __init__(self, data, **kwargs) -> None: self.kwargs = kwargs - self.data = self._compute_plot_data(data) + self.data = data def generate(self) -> None: - self.axes = self.data.plot(kind=self._kind, **self.kwargs) + plot_data = self._compute_plot_data() + self.axes = plot_data.plot(kind=self._kind, **self.kwargs) - def _compute_plot_data(self, data): + def _compute_sample_data(self, data): # TODO: Cache the sampling data in the PlotAccessor. - sampling_n = self.kwargs.pop("sampling_n", 100) - sampling_random_state = self.kwargs.pop("sampling_random_state", 0) - return ( - data.sample(n=sampling_n, random_state=sampling_random_state) - .to_pandas() - .sort_index() + sampling_n = self.kwargs.pop("sampling_n", DEFAULT_SAMPLING_N) + sampling_random_state = self.kwargs.pop( + "sampling_random_state", DEFAULT_SAMPLING_STATE ) + return data.sample( + n=sampling_n, + random_state=sampling_random_state, + sort=False, + ).to_pandas() + + def _compute_plot_data(self): + return self._compute_sample_data(self.data) class LinePlot(SamplingPlot): @@ -70,3 +88,45 @@ class ScatterPlot(SamplingPlot): @property def _kind(self) -> typing.Literal["scatter"]: return "scatter" + + def __init__(self, data, **kwargs) -> None: + super().__init__(data, **kwargs) + + c = self.kwargs.get("c", None) + if self._is_sequence_arg(c): + raise NotImplementedError( + f"Only support a single color string or a column name/posision. {constants.FEEDBACK_LINK}" + ) + + def _compute_plot_data(self): + sample = self._compute_sample_data(self.data) + + # Works around a pandas bug: + # https://github.com/pandas-dev/pandas/commit/45b937d64f6b7b6971856a47e379c7c87af7e00a + c = self.kwargs.get("c", None) + if pd.core.dtypes.common.is_integer(c): + c = self.data.columns[c] + if self._is_column_name(c, sample) and sample[c].dtype == dtypes.STRING_DTYPE: + sample[c] = sample[c].astype("object") + + return sample + + def _is_sequence_arg(self, arg): + return ( + arg is not None + and not isinstance(arg, str) + and isinstance(arg, typing.Iterable) + ) + + def _is_column_name(self, arg, data): + return ( + arg is not None + and pd.core.dtypes.common.is_hashable(arg) + and arg in data.columns + ) + + def _generate_new_column_name(self, data): + col_name = None + while col_name is None or col_name in data.columns: + col_name = f"plot_temp_{str(uuid.uuid4())[:8]}" + return col_name diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py index 6301ece865..9a270f1ce7 100644 --- a/bigframes/operations/aggregations.py +++ b/bigframes/operations/aggregations.py @@ -19,6 +19,11 @@ import typing from typing import ClassVar, Hashable, Optional, Tuple +import pandas as pd +import pyarrow as pa + +import bigframes.dtypes as dtypes + @dataclasses.dataclass(frozen=True) class WindowOp: @@ -32,6 +37,10 @@ def handles_ties(self): """Whether the operator can handle ties without nondeterministic output. (eg. rank operator can handle ties but not the count operator)""" return False + @abc.abstractmethod + def output_type(self, *input_types: dtypes.ExpressionType): + ... + @dataclasses.dataclass(frozen=True) class UnaryWindowOp(WindowOp): @@ -39,6 +48,9 @@ class UnaryWindowOp(WindowOp): def arguments(self) -> int: return 1 + def output_type(self, *input_types: dtypes.ExpressionType): + return input_types[0] + @dataclasses.dataclass(frozen=True) class AggregateOp(WindowOp): @@ -73,11 +85,24 @@ def arguments(self) -> int: class SumOp(UnaryAggregateOp): name: ClassVar[str] = "sum" + def output_type(self, *input_types: dtypes.ExpressionType): + if pd.api.types.is_bool_dtype(input_types[0]): + return dtypes.INT_DTYPE + else: + return input_types[0] + @dataclasses.dataclass(frozen=True) class MedianOp(UnaryAggregateOp): name: ClassVar[str] = "median" + def output_type(self, *input_types: dtypes.ExpressionType): + # These will change if median is changed to exact implementation. + if pd.api.types.is_bool_dtype(input_types[0]): + return dtypes.INT_DTYPE + else: + return input_types[0] + @dataclasses.dataclass(frozen=True) class ApproxQuartilesOp(UnaryAggregateOp): @@ -87,16 +112,38 @@ class ApproxQuartilesOp(UnaryAggregateOp): def name(self): return f"{self.quartile*25}%" + def output_type(self, *input_types: dtypes.ExpressionType): + if pd.api.types.is_bool_dtype(input_types[0]) or pd.api.types.is_integer_dtype( + input_types[0] + ): + return dtypes.FLOAT_DTYPE + else: + return input_types[0] + @dataclasses.dataclass(frozen=True) class MeanOp(UnaryAggregateOp): name: ClassVar[str] = "mean" + def output_type(self, *input_types: dtypes.ExpressionType): + if pd.api.types.is_bool_dtype(input_types[0]) or pd.api.types.is_integer_dtype( + input_types[0] + ): + return dtypes.FLOAT_DTYPE + else: + return input_types[0] + @dataclasses.dataclass(frozen=True) class ProductOp(UnaryAggregateOp): name: ClassVar[str] = "product" + def output_type(self, *input_types: dtypes.ExpressionType): + if pd.api.types.is_bool_dtype(input_types[0]): + return dtypes.INT_DTYPE + else: + return input_types[0] + @dataclasses.dataclass(frozen=True) class MaxOp(UnaryAggregateOp): @@ -112,16 +159,25 @@ class MinOp(UnaryAggregateOp): class StdOp(UnaryAggregateOp): name: ClassVar[str] = "std" + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.FLOAT_DTYPE + @dataclasses.dataclass(frozen=True) class VarOp(UnaryAggregateOp): name: ClassVar[str] = "var" + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.FLOAT_DTYPE + @dataclasses.dataclass(frozen=True) class PopVarOp(UnaryAggregateOp): name: ClassVar[str] = "popvar" + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.FLOAT_DTYPE + @dataclasses.dataclass(frozen=True) class CountOp(UnaryAggregateOp): @@ -131,6 +187,9 @@ class CountOp(UnaryAggregateOp): def skips_nulls(self): return False + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.INT_DTYPE + @dataclasses.dataclass(frozen=True) class CutOp(UnaryWindowOp): @@ -146,6 +205,24 @@ def skips_nulls(self): def handles_ties(self): return True + def output_type(self, *input_types: dtypes.ExpressionType): + if isinstance(self.bins, int) and (self.labels is False): + return dtypes.INT_DTYPE + else: + # Assumption: buckets use same numeric type + interval_dtype = ( + pa.float64() + if isinstance(self.bins, int) + else dtypes.infer_literal_arrow_type(self.bins[0][0]) + ) + pa_type = pa.struct( + [ + ("left_exclusive", interval_dtype), + ("right_inclusive", interval_dtype), + ] + ) + return pd.ArrowDtype(pa_type) + @dataclasses.dataclass(frozen=True) class QcutOp(UnaryWindowOp): @@ -163,6 +240,9 @@ def skips_nulls(self): def handles_ties(self): return True + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.INT_DTYPE + @dataclasses.dataclass(frozen=True) class NuniqueOp(UnaryAggregateOp): @@ -172,6 +252,9 @@ class NuniqueOp(UnaryAggregateOp): def skips_nulls(self): return False + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.INT_DTYPE + @dataclasses.dataclass(frozen=True) class AnyValueOp(UnaryAggregateOp): @@ -196,6 +279,9 @@ def skips_nulls(self): def handles_ties(self): return True + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.INT_DTYPE + @dataclasses.dataclass(frozen=True) class DenseRankOp(UnaryWindowOp): @@ -207,6 +293,9 @@ def skips_nulls(self): def handles_ties(self): return True + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.INT_DTYPE + @dataclasses.dataclass(frozen=True) class FirstOp(UnaryWindowOp): @@ -254,21 +343,33 @@ def skips_nulls(self): class AllOp(UnaryAggregateOp): name: ClassVar[str] = "all" + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.BOOL_DTYPE + @dataclasses.dataclass(frozen=True) class AnyOp(UnaryAggregateOp): name: ClassVar[str] = "any" + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.BOOL_DTYPE + @dataclasses.dataclass(frozen=True) class CorrOp(BinaryAggregateOp): name: ClassVar[str] = "corr" + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.FLOAT_DTYPE + @dataclasses.dataclass(frozen=True) class CovOp(BinaryAggregateOp): name: ClassVar[str] = "cov" + def output_type(self, *input_types: dtypes.ExpressionType): + return dtypes.FLOAT_DTYPE + sum_op = SumOp() mean_op = MeanOp() diff --git a/bigframes/operations/datetimes.py b/bigframes/operations/datetimes.py index eb91bc0b20..7d25ac3622 100644 --- a/bigframes/operations/datetimes.py +++ b/bigframes/operations/datetimes.py @@ -94,3 +94,9 @@ def unit(self) -> str: def strftime(self, date_format: str) -> series.Series: return self._apply_unary_op(ops.StrftimeOp(date_format=date_format)) + + def normalize(self) -> series.Series: + return self._apply_unary_op(ops.normalize_op) + + def floor(self, freq: str) -> series.Series: + return self._apply_unary_op(ops.FloorDtOp(freq=freq)) diff --git a/bigframes/operations/type.py b/bigframes/operations/type.py index 3c16f0cbe9..30e0c1e745 100644 --- a/bigframes/operations/type.py +++ b/bigframes/operations/type.py @@ -15,8 +15,6 @@ import dataclasses import functools -import pandas as pd - import bigframes.dtypes from bigframes.dtypes import ExpressionType @@ -46,13 +44,10 @@ def output_type(self, *input_types: ExpressionType) -> ExpressionType: @dataclasses.dataclass class RealNumeric(OpTypeRule): def output_type(self, *input_types: ExpressionType) -> ExpressionType: - all_ints = all(pd.api.types.is_integer(input) for input in input_types) - if all_ints: - return bigframes.dtypes.FLOAT_DTYPE - else: - return functools.reduce( - lambda t1, t2: bigframes.dtypes.lcd_etype(t1, t2), input_types - ) + return functools.reduce( + lambda t1, t2: bigframes.dtypes.lcd_etype(t1, t2), + [*input_types, bigframes.dtypes.FLOAT_DTYPE], + ) @dataclasses.dataclass diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 10caf17b79..b6476c5eb8 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -19,6 +19,7 @@ from collections import namedtuple from datetime import datetime import inspect +import resource import sys import typing from typing import ( @@ -491,9 +492,10 @@ def read_gbq( *, index_col: Iterable[str] | str = (), columns: Iterable[str] = (), + configuration: Optional[Dict] = None, max_results: Optional[int] = None, filters: vendored_pandas_gbq.FiltersType = (), - use_cache: bool = True, + use_cache: Optional[bool] = None, col_order: Iterable[str] = (), ) -> bigframes.dataframe.DataFrame: _set_default_session_location_if_possible(query_or_table) @@ -502,6 +504,7 @@ def read_gbq( query_or_table, index_col=index_col, columns=columns, + configuration=configuration, max_results=max_results, filters=filters, use_cache=use_cache, @@ -527,8 +530,9 @@ def read_gbq_query( *, index_col: Iterable[str] | str = (), columns: Iterable[str] = (), + configuration: Optional[Dict] = None, max_results: Optional[int] = None, - use_cache: bool = True, + use_cache: Optional[bool] = None, col_order: Iterable[str] = (), ) -> bigframes.dataframe.DataFrame: _set_default_session_location_if_possible(query) @@ -537,6 +541,7 @@ def read_gbq_query( query, index_col=index_col, columns=columns, + configuration=configuration, max_results=max_results, use_cache=use_cache, col_order=col_order, @@ -705,7 +710,13 @@ def to_datetime( # SQL Compilation uses recursive algorithms on deep trees # 10M tree depth should be sufficient to generate any sql that is under bigquery limit +# Note: This limit does not have the desired effect on Python 3.12 in +# which the applicable limit is now hard coded. See: +# https://github.com/python/cpython/issues/112282 sys.setrecursionlimit(max(10000000, sys.getrecursionlimit())) +resource.setrlimit( + resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY) +) # Use __all__ to let type checkers know what is part of the public API. __all___ = [ diff --git a/bigframes/series.py b/bigframes/series.py index 6128238057..e7b358c2fe 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -19,9 +19,10 @@ import functools import itertools import numbers +import os import textwrap import typing -from typing import Any, Mapping, Optional, Tuple, Union +from typing import Any, Literal, Mapping, Optional, Tuple, Union import bigframes_vendored.pandas.core.series as vendored_pandas_series import google.cloud.bigquery as bigquery @@ -39,7 +40,7 @@ import bigframes.core.groupby as groupby import bigframes.core.indexers import bigframes.core.indexes as indexes -from bigframes.core.ordering import OrderingColumnReference, OrderingDirection +import bigframes.core.ordering as order import bigframes.core.scalar as scalars import bigframes.core.utils as utils import bigframes.core.window @@ -71,6 +72,11 @@ def __init__(self, *args, **kwargs): self._query_job: Optional[bigquery.QueryJob] = None super().__init__(*args, **kwargs) + # Runs strict validations to ensure internal type predictions and ibis are completely in sync + # Do not execute these validations outside of testing suite. + if "PYTEST_CURRENT_TEST" in os.environ: + self._block.expr.validate_schema() + @property def dt(self) -> dt.DatetimeMethods: return dt.DatetimeMethods(self._block) @@ -103,6 +109,11 @@ def at(self) -> bigframes.core.indexers.AtSeriesIndexer: def name(self) -> blocks.Label: return self._name + @name.setter + def name(self, label: blocks.Label): + new_block = self._block.with_column_labels([label]) + self._set_block(new_block) + @property def shape(self) -> typing.Tuple[int]: return (self._block.shape[0],) @@ -869,7 +880,7 @@ def mode(self) -> Series: # use temporary name for reset_index to avoid collision, restore after dropping extra columns block = ( block.with_index_labels(["mode_temp_internal"]) - .order_by([OrderingColumnReference(self._value_column)]) + .order_by([order.ascending_over(self._value_column)]) .reset_index(drop=False) ) block = block.select_column(self._value_column).with_column_labels([self.name]) @@ -935,10 +946,8 @@ def argmax(self) -> int: block, row_nums = self._block.promote_offsets() block = block.order_by( [ - OrderingColumnReference( - self._value_column, direction=OrderingDirection.DESC - ), - OrderingColumnReference(row_nums), + order.descending_over(self._value_column), + order.ascending_over(row_nums), ] ) return typing.cast( @@ -949,8 +958,8 @@ def argmin(self) -> int: block, row_nums = self._block.promote_offsets() block = block.order_by( [ - OrderingColumnReference(self._value_column), - OrderingColumnReference(row_nums), + order.ascending_over(self._value_column), + order.ascending_over(row_nums), ] ) return typing.cast( @@ -983,11 +992,9 @@ def unstack(self, level: LevelsType = -1): def idxmax(self) -> blocks.Label: block = self._block.order_by( [ - OrderingColumnReference( - self._value_column, direction=OrderingDirection.DESC - ), + order.descending_over(self._value_column), *[ - OrderingColumnReference(idx_col) + order.ascending_over(idx_col) for idx_col in self._block.index_columns ], ] @@ -998,9 +1005,9 @@ def idxmax(self) -> blocks.Label: def idxmin(self) -> blocks.Label: block = self._block.order_by( [ - OrderingColumnReference(self._value_column), + order.ascending_over(self._value_column), *[ - OrderingColumnReference(idx_col) + order.ascending_over(idx_col) for idx_col in self._block.index_columns ], ] @@ -1093,14 +1100,11 @@ def sort_values( ) -> Series: if na_position not in ["first", "last"]: raise ValueError("Param na_position must be one of 'first' or 'last'") - direction = OrderingDirection.ASC if ascending else OrderingDirection.DESC block = self._block.order_by( [ - OrderingColumnReference( - self._value_column, - direction=direction, - na_last=(na_position == "last"), - ) + order.ascending_over(self._value_column, (na_position == "last")) + if ascending + else order.descending_over(self._value_column, (na_position == "last")) ], ) return Series(block) @@ -1110,10 +1114,11 @@ def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series: if na_position not in ["first", "last"]: raise ValueError("Param na_position must be one of 'first' or 'last'") block = self._block - direction = OrderingDirection.ASC if ascending else OrderingDirection.DESC na_last = na_position == "last" ordering = [ - OrderingColumnReference(column, direction=direction, na_last=na_last) + order.ascending_over(column, na_last) + if ascending + else order.descending_over(column, na_last) for column in block.index_columns ] block = block.order_by(ordering) @@ -1529,6 +1534,7 @@ def sample( frac: Optional[float] = None, *, random_state: Optional[int] = None, + sort: Optional[bool | Literal["random"]] = "random", ) -> Series: if n is not None and frac is not None: raise ValueError("Only one of 'n' or 'frac' parameter can be specified.") @@ -1536,7 +1542,9 @@ def sample( ns = (n,) if n is not None else () fracs = (frac,) if frac is not None else () return Series( - self._block._split(ns=ns, fracs=fracs, random_state=random_state)[0] + self._block._split( + ns=ns, fracs=fracs, random_state=random_state, sort=sort + )[0] ) def __array_ufunc__( diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 4cb3c11859..479b3a7bac 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -16,6 +16,7 @@ from __future__ import annotations +import copy import datetime import itertools import logging @@ -70,18 +71,20 @@ ReadPickleBuffer, StorageOptions, ) +import pyarrow as pa import bigframes._config.bigquery_options as bigquery_options +import bigframes.clients import bigframes.constants as constants import bigframes.core as core import bigframes.core.blocks as blocks import bigframes.core.compile import bigframes.core.guid as guid -from bigframes.core.ordering import IntegerEncoding, OrderingColumnReference -import bigframes.core.ordering as orderings +from bigframes.core.ordering import IntegerEncoding +import bigframes.core.ordering as order import bigframes.core.traversal as traversals import bigframes.core.utils as utils -import bigframes.dataframe as dataframe +import bigframes.dtypes import bigframes.formatting_helpers as formatting_helpers from bigframes.functions.remote_function import read_gbq_function as bigframes_rgf from bigframes.functions.remote_function import remote_function as bigframes_rf @@ -89,6 +92,10 @@ import bigframes.session.clients import bigframes.version +# Avoid circular imports. +if typing.TYPE_CHECKING: + import bigframes.dataframe as dataframe + _BIGFRAMES_DEFAULT_CONNECTION_ID = "bigframes-default-connection" _MAX_CLUSTER_COLUMNS = 4 @@ -112,6 +119,20 @@ logger = logging.getLogger(__name__) +# Excludes geography, bytes, and nested (array, struct) datatypes +INLINABLE_DTYPES: Sequence[bigframes.dtypes.Dtype] = ( + pandas.BooleanDtype(), + pandas.Float64Dtype(), + pandas.Int64Dtype(), + pandas.StringDtype(storage="pyarrow"), + pandas.ArrowDtype(pa.date32()), + pandas.ArrowDtype(pa.time64("us")), + pandas.ArrowDtype(pa.timestamp("us")), + pandas.ArrowDtype(pa.timestamp("us", tz="UTC")), + pandas.ArrowDtype(pa.decimal128(38, 9)), + pandas.ArrowDtype(pa.decimal256(76, 38)), +) + def _is_query(query_or_table: str) -> bool: """Determine if `query_or_table` is a table ID or a SQL string""" @@ -137,7 +158,7 @@ class Session( Configuration adjusting how to connect to BigQuery and related APIs. Note that some options are ignored if ``clients_provider`` is set. - clients_provider (bigframes.session.bigframes.session.clients.ClientsProvider): + clients_provider (bigframes.session.clients.ClientsProvider): An object providing client library objects. """ @@ -196,6 +217,7 @@ def __init__( # Resolve the BQ connection for remote function and Vertex AI integration self._bq_connection = context.bq_connection or _BIGFRAMES_DEFAULT_CONNECTION_ID + self._skip_bq_connection_check = context._skip_bq_connection_check # Now that we're starting the session, don't allow the options to be # changed. @@ -222,6 +244,16 @@ def cloudfunctionsclient(self): def resourcemanagerclient(self): return self._clients_provider.resourcemanagerclient + _bq_connection_manager: Optional[bigframes.clients.BqConnectionManager] = None + + @property + def bqconnectionmanager(self): + if not self._skip_bq_connection_check and not self._bq_connection_manager: + self._bq_connection_manager = bigframes.clients.BqConnectionManager( + self.bqconnectionclient, self.resourcemanagerclient + ) + return self._bq_connection_manager + @property def _project(self): return self.bqclient.project @@ -255,9 +287,10 @@ def read_gbq( *, index_col: Iterable[str] | str = (), columns: Iterable[str] = (), + configuration: Optional[Dict] = None, max_results: Optional[int] = None, filters: third_party_pandas_gbq.FiltersType = (), - use_cache: bool = True, + use_cache: Optional[bool] = None, col_order: Iterable[str] = (), # Add a verify index argument that fails if the index is not unique. ) -> dataframe.DataFrame: @@ -278,6 +311,7 @@ def read_gbq( query_or_table, index_col=index_col, columns=columns, + configuration=configuration, max_results=max_results, api_name="read_gbq", use_cache=use_cache, @@ -286,13 +320,20 @@ def read_gbq( # TODO(swast): Query the snapshot table but mark it as a # deterministic query so we can avoid serializing if we have a # unique index. + if configuration is not None: + raise ValueError( + "The 'configuration' argument is not allowed when " + "directly reading from a table. Please remove " + "'configuration' or use a query." + ) + return self._read_gbq_table( query_or_table, index_col=index_col, columns=columns, max_results=max_results, api_name="read_gbq", - use_cache=use_cache, + use_cache=use_cache if use_cache is not None else True, ) def _to_query( @@ -377,7 +418,7 @@ def _query_to_destination( query: str, index_cols: List[str], api_name: str, - use_cache: bool = True, + configuration: dict = {"query": {"useQueryCache": True}}, ) -> Tuple[Optional[bigquery.TableReference], Optional[bigquery.QueryJob]]: # If a dry_run indicates this is not a query type job, then don't # bother trying to do a CREATE TEMP TABLE ... AS SELECT ... statement. @@ -399,23 +440,35 @@ def _query_to_destination( ][:_MAX_CLUSTER_COLUMNS] temp_table = self._create_empty_temp_table(schema, cluster_cols) - job_config = bigquery.QueryJobConfig() + timeout_ms = configuration.get("jobTimeoutMs") or configuration["query"].get( + "timeoutMs" + ) + + # Convert timeout_ms to seconds, ensuring a minimum of 0.1 seconds to avoid + # the program getting stuck on too-short timeouts. + timeout = max(int(timeout_ms) * 1e-3, 0.1) if timeout_ms else None + + job_config = typing.cast( + bigquery.QueryJobConfig, + bigquery.QueryJobConfig.from_api_repr(configuration), + ) job_config.labels["bigframes-api"] = api_name job_config.destination = temp_table - job_config.use_query_cache = use_cache try: # Write to temp table to workaround BigQuery 10 GB query results # limit. See: internal issue 303057336. job_config.labels["error_caught"] = "true" - _, query_job = self._start_query(query, job_config=job_config) + _, query_job = self._start_query( + query, job_config=job_config, timeout=timeout + ) return query_job.destination, query_job except google.api_core.exceptions.BadRequest: # Some SELECT statements still aren't compatible with cluster # tables as the destination. For example, if the query has a # top-level ORDER BY, this conflicts with our ability to cluster # the table by the index column(s). - _, query_job = self._start_query(query) + _, query_job = self._start_query(query, timeout=timeout) return query_job.destination, query_job def read_gbq_query( @@ -424,8 +477,9 @@ def read_gbq_query( *, index_col: Iterable[str] | str = (), columns: Iterable[str] = (), + configuration: Optional[Dict] = None, max_results: Optional[int] = None, - use_cache: bool = True, + use_cache: Optional[bool] = None, col_order: Iterable[str] = (), ) -> dataframe.DataFrame: """Turn a SQL query into a DataFrame. @@ -489,6 +543,7 @@ def read_gbq_query( query=query, index_col=index_col, columns=columns, + configuration=configuration, max_results=max_results, api_name="read_gbq_query", use_cache=use_cache, @@ -500,10 +555,36 @@ def _read_gbq_query( *, index_col: Iterable[str] | str = (), columns: Iterable[str] = (), + configuration: Optional[Dict] = None, max_results: Optional[int] = None, api_name: str = "read_gbq_query", - use_cache: bool = True, + use_cache: Optional[bool] = None, ) -> dataframe.DataFrame: + import bigframes.dataframe as dataframe + + configuration = _transform_read_gbq_configuration(configuration) + + if "query" not in configuration: + configuration["query"] = {} + + if "query" in configuration["query"]: + raise ValueError( + "The query statement must not be included in the ", + "'configuration' because it is already provided as", + " a separate parameter.", + ) + + if "useQueryCache" in configuration["query"]: + if use_cache is not None: + raise ValueError( + "'useQueryCache' in 'configuration' conflicts with" + " 'use_cache' parameter. Please specify only one." + ) + else: + configuration["query"]["useQueryCache"] = ( + True if use_cache is None else use_cache + ) + if isinstance(index_col, str): index_cols = [index_col] else: @@ -513,7 +594,7 @@ def _read_gbq_query( query, index_cols, api_name=api_name, - use_cache=use_cache, + configuration=configuration, ) # If there was no destination table, that means the query must have @@ -537,7 +618,7 @@ def _read_gbq_query( index_col=index_cols, columns=columns, max_results=max_results, - use_cache=use_cache, + use_cache=configuration["query"]["useQueryCache"], ) def read_gbq_table( @@ -678,6 +759,8 @@ def _read_gbq_table( api_name: str, use_cache: bool = True, ) -> dataframe.DataFrame: + import bigframes.dataframe as dataframe + if max_results and max_results <= 0: raise ValueError("`max_results` should be a positive number.") @@ -719,10 +802,9 @@ def _read_gbq_table( # Note: currently, a table has a total ordering only when the # primary key(s) are set on a table. The query engine assumes such # columns are unique, even if not enforced. - ordering = orderings.ExpressionOrdering( + ordering = order.ExpressionOrdering( ordering_value_columns=tuple( - core.OrderingColumnReference(column_id) - for column_id in total_ordering_cols + order.ascending_over(column_id) for column_id in total_ordering_cols ), total_ordering_columns=frozenset(total_ordering_cols), ) @@ -737,12 +819,9 @@ def _read_gbq_table( elif len(index_cols) != 0: # We have index columns, lets see if those are actually total_order_columns - ordering = orderings.ExpressionOrdering( + ordering = order.ExpressionOrdering( ordering_value_columns=tuple( - [ - core.OrderingColumnReference(column_id) - for column_id in index_cols - ] + [order.ascending_over(column_id) for column_id in index_cols] ), total_ordering_columns=frozenset(index_cols), ) @@ -917,40 +996,59 @@ def read_pandas(self, pandas_dataframe: pandas.DataFrame) -> dataframe.DataFrame def _read_pandas( self, pandas_dataframe: pandas.DataFrame, api_name: str ) -> dataframe.DataFrame: + import bigframes.dataframe as dataframe + if isinstance(pandas_dataframe, dataframe.DataFrame): raise ValueError( "read_pandas() expects a pandas.DataFrame, but got a " "bigframes.pandas.DataFrame." ) - if ( - pandas_dataframe.size < MAX_INLINE_DF_SIZE - # TODO(swast): Workaround data types limitation in inline data. - and not any( - ( - isinstance(s.dtype, pandas.ArrowDtype) - or (len(s) > 0 and pandas.api.types.is_list_like(s.iloc[0])) - or pandas.api.types.is_datetime64_any_dtype(s) - ) - for _, s in pandas_dataframe.items() - ) - ): - return self._read_pandas_inline(pandas_dataframe) + inline_df = self._read_pandas_inline(pandas_dataframe) + if inline_df is not None: + return inline_df return self._read_pandas_load_job(pandas_dataframe, api_name) def _read_pandas_inline( self, pandas_dataframe: pandas.DataFrame - ) -> dataframe.DataFrame: - return dataframe.DataFrame(blocks.Block.from_local(pandas_dataframe, self)) + ) -> Optional[dataframe.DataFrame]: + import bigframes.dataframe as dataframe + + if pandas_dataframe.size > MAX_INLINE_DF_SIZE: + return None + + try: + inline_df = dataframe.DataFrame( + blocks.Block.from_local(pandas_dataframe, self) + ) + except ValueError: # Thrown by ibis for some unhandled types + return None + except pa.ArrowTypeError: # Thrown by arrow for types without mapping (geo). + return None + + inline_types = inline_df._block.expr.schema.dtypes + # Ibis has problems escaping bytes literals, which will cause syntax errors server-side. + if all(dtype in INLINABLE_DTYPES for dtype in inline_types): + return inline_df + return None def _read_pandas_load_job( self, pandas_dataframe: pandas.DataFrame, api_name: str ) -> dataframe.DataFrame: + import bigframes.dataframe as dataframe + + col_index = pandas_dataframe.columns.copy() col_labels, idx_labels = ( - pandas_dataframe.columns.to_list(), + col_index.to_list(), pandas_dataframe.index.names, ) - new_col_ids, new_idx_ids = utils.get_standardized_ids(col_labels, idx_labels) + new_col_ids, new_idx_ids = utils.get_standardized_ids( + col_labels, + idx_labels, + # Loading parquet files into BigQuery with special column names + # is only supported under an allowlist. + strict=True, + ) # Add order column to pandas DataFrame to preserve order in BigQuery ordering_col = "rowid" @@ -969,7 +1067,7 @@ def _read_pandas_load_job( # Specify the datetime dtypes, which is auto-detected as timestamp types. schema: list[bigquery.SchemaField] = [] - for column, dtype in zip(pandas_dataframe.columns, pandas_dataframe.dtypes): + for column, dtype in zip(new_col_ids, pandas_dataframe.dtypes): if dtype == "timestamp[us][pyarrow]": schema.append( bigquery.SchemaField(column, bigquery.enums.SqlTypeNames.DATETIME) @@ -990,8 +1088,8 @@ def _read_pandas_load_job( ) self._start_generic_job(load_job) - ordering = orderings.ExpressionOrdering( - ordering_value_columns=tuple([OrderingColumnReference(ordering_col)]), + ordering = order.ExpressionOrdering( + ordering_value_columns=tuple([order.ascending_over(ordering_col)]), total_ordering_columns=frozenset([ordering_col]), integer_encoding=IntegerEncoding(True, is_sequential=True), ) @@ -1023,7 +1121,7 @@ def _read_pandas_load_job( block = blocks.Block( array_value, index_columns=new_idx_ids, - column_labels=col_labels, + column_labels=col_index, index_labels=idx_labels, ) return dataframe.DataFrame(block) @@ -1346,9 +1444,9 @@ def _create_total_ordering( itertools.chain(original_column_ids, [full_row_hash, random_value]) ) - ordering_ref1 = core.OrderingColumnReference(ordering_hash_part) - ordering_ref2 = core.OrderingColumnReference(ordering_rand_part) - ordering = orderings.ExpressionOrdering( + ordering_ref1 = order.ascending_over(ordering_hash_part) + ordering_ref2 = order.ascending_over(ordering_rand_part) + ordering = order.ExpressionOrdering( ordering_value_columns=(ordering_ref1, ordering_ref2), total_ordering_columns=frozenset([ordering_hash_part, ordering_rand_part]), ) @@ -1626,13 +1724,14 @@ def _start_query( sql: str, job_config: Optional[bigquery.job.QueryJobConfig] = None, max_results: Optional[int] = None, + timeout: Optional[float] = None, ) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]: """ Starts BigQuery query job and waits for results. """ job_config = self._prepare_query_job_config(job_config) return bigframes.session._io.bigquery.start_query_with_client( - self.bqclient, sql, job_config, max_results + self.bqclient, sql, job_config, max_results, timeout ) def _start_query_ml_ddl( @@ -1712,7 +1811,7 @@ def _cache_with_offsets(self, array_value: core.ArrayValue) -> core.ArrayValue: table_expression, columns=new_columns, hidden_ordering_columns=new_hidden_columns, - ordering=orderings.ExpressionOrdering.from_offset_col("bigframes_offsets"), + ordering=order.ExpressionOrdering.from_offset_col("bigframes_offsets"), ) def _is_trivially_executable(self, array_value: core.ArrayValue): @@ -1790,8 +1889,10 @@ def _get_table_size(self, destination_table): def _rows_to_dataframe( self, row_iterator: bigquery.table.RowIterator, dtypes: Dict ) -> pandas.DataFrame: + # Can ignore inferred datatype until dtype emulation breaks 1:1 mapping between BQ types and bigframes types + dtypes_from_bq = bigframes.dtypes.bf_type_from_type_kind(row_iterator.schema) arrow_table = row_iterator.to_arrow() - return bigframes.session._io.pandas.arrow_to_pandas(arrow_table, dtypes) + return bigframes.session._io.pandas.arrow_to_pandas(arrow_table, dtypes_from_bq) def _start_generic_job(self, job: formatting_helpers.GenericJob): if bigframes.options.display.progress_bar is not None: @@ -1846,3 +1947,25 @@ def _convert_to_nonnull_string(column: ibis_types.Column) -> ibis_types.StringVa # Escape backslashes and use backslash as delineator escaped = typing.cast(ibis_types.StringColumn, result.fillna("")).replace("\\", "\\\\") # type: ignore return typing.cast(ibis_types.StringColumn, ibis.literal("\\")).concat(escaped) + + +def _transform_read_gbq_configuration(configuration: Optional[dict]) -> dict: + """ + For backwards-compatibility, convert any previously client-side only + parameters such as timeoutMs to the property name expected by the REST API. + + Makes a copy of configuration if changes are needed. + """ + + if configuration is None: + return {} + + timeout_ms = configuration.get("query", {}).get("timeoutMs") + if timeout_ms is not None: + # Transform timeoutMs to an actual server-side configuration. + # https://github.com/googleapis/python-bigquery-pandas/issues/479 + configuration = copy.deepcopy(configuration) + del configuration["query"]["timeoutMs"] + configuration["jobTimeoutMs"] = timeout_ms + + return configuration diff --git a/bigframes/session/_io/bigquery.py b/bigframes/session/_io/bigquery.py index 67820bbbcb..75283a060a 100644 --- a/bigframes/session/_io/bigquery.py +++ b/bigframes/session/_io/bigquery.py @@ -29,7 +29,6 @@ import bigframes from bigframes.core import log_adapter import bigframes.formatting_helpers as formatting_helpers -import bigframes.session._io.bigquery as bigframes_io IO_ORDERING_ID = "bqdf_row_nums" MAX_LABELS_COUNT = 64 @@ -220,17 +219,18 @@ def start_query_with_client( sql: str, job_config: bigquery.job.QueryJobConfig, max_results: Optional[int] = None, + timeout: Optional[float] = None, ) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]: """ Starts query job and waits for results. """ api_methods = log_adapter.get_and_reset_api_methods() - job_config.labels = bigframes_io.create_job_configs_labels( + job_config.labels = create_job_configs_labels( job_configs_labels=job_config.labels, api_methods=api_methods ) try: - query_job = bq_client.query(sql, job_config=job_config) + query_job = bq_client.query(sql, job_config=job_config, timeout=timeout) except google.api_core.exceptions.Forbidden as ex: if "Drive credentials" in ex.message: ex.message += "\nCheck https://cloud.google.com/bigquery/docs/query-drive-data#Google_Drive_permissions." diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py index 7574aa4454..d97e53901d 100644 --- a/bigframes/session/clients.py +++ b/bigframes/session/clients.py @@ -38,7 +38,13 @@ _SCOPES = ["https://www.googleapis.com/auth/cloud-platform"] # Regions for which Regional Endpoints (REPs) are supported -_REP_SUPPORTED_REGIONS = {"me-central2"} +_REP_SUPPORTED_REGIONS = { + "me-central2", + "europe-west9", + "europe-west3", + "us-east4", + "us-west1", +} # BigQuery is a REST API, which requires the protocol as part of the URL. diff --git a/bigframes/version.py b/bigframes/version.py index 8066f4353a..8e31592250 100644 --- a/bigframes/version.py +++ b/bigframes/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.26.0" +__version__ = "1.0.0" diff --git a/mypy.ini b/mypy.ini index 3809f8e241..5707f14154 100644 --- a/mypy.ini +++ b/mypy.ini @@ -29,3 +29,6 @@ ignore_missing_imports = True [mypy-ipywidgets] ignore_missing_imports = True + +[mypy-pyarrow.feather] +ignore_missing_imports = True diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb index d6d819f9e3..61445d85c5 100644 --- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb +++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb @@ -139,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -289,7 +289,7 @@ { "data": { "text/html": [ - "Query job 313ed696-37fc-46b3-806e-6041403080d3 is DONE. 2.3 GB processed. Open Job" + "Query job d5778724-6966-42ba-b8a6-2a1865a1184c is DONE. 2.3 GB processed. Open Job" ], "text/plain": [ "" @@ -301,19 +301,7 @@ { "data": { "text/html": [ - "Query job f149ff34-4807-4cba-841f-fb7bf51bbbd6 is DONE. 0 Bytes processed. Open Job" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Query job 8ed05179-52b2-437d-a709-f651a80de307 is DONE. 4.6 kB processed. Open Job" + "Query job 4d48bf69-571c-4773-8486-0232840597d5 is DONE. 55.1 MB processed. Open Job" ], "text/plain": [ "" @@ -348,39 +336,36 @@ " \n", " \n", " \n", - " 2\n", - " COLLECTION BUREAU OF AMERICA ACCOUNT NO. XXXX...\n", + " 24\n", + " I sent disputed to Transunion, XXXX and XXXX f...\n", " \n", " \n", - " 3\n", - " Despite multiple written requests, the unverif...\n", + " 942\n", + " on XX/XX/2017 I sent XXXX, transunion, XXXX pr...\n", " \n", " \n", - " 6\n", - " Once again you guys have not provided me with ...\n", + " 1193\n", + " On Wednesday, XXXX XXXX , I initiated a wir...\n", " \n", " \n", - " 9\n", - " XX/XX/XXXX {$350.00} I received a outstating d...\n", + " 1292\n", + " Dear Sir or Madam, I am a victim of identity t...\n", " \n", " \n", - " 10\n", - " Im am unable to withdraw money from my account...\n", + " 1377\n", + " For the purpose of this complaint, I will refe...\n", " \n", " \n", "\n", - "

5 rows × 1 columns

\n", - "[5 rows x 1 columns in total]" + "" ], "text/plain": [ - " consumer_complaint_narrative\n", - "2 COLLECTION BUREAU OF AMERICA ACCOUNT NO. XXXX...\n", - "3 Despite multiple written requests, the unverif...\n", - "6 Once again you guys have not provided me with ...\n", - "9 XX/XX/XXXX {$350.00} I received a outstating d...\n", - "10 Im am unable to withdraw money from my account...\n", - "\n", - "[5 rows x 1 columns]" + " consumer_complaint_narrative\n", + "24 I sent disputed to Transunion, XXXX and XXXX f...\n", + "942 on XX/XX/2017 I sent XXXX, transunion, XXXX pr...\n", + "1193 On Wednesday, XXXX XXXX , I initiated a wir...\n", + "1292 Dear Sir or Madam, I am a victim of identity t...\n", + "1377 For the purpose of this complaint, I will refe..." ] }, "execution_count": 7, @@ -390,7 +375,7 @@ ], "source": [ "issues_df = input_df[[\"consumer_complaint_narrative\"]].dropna()\n", - "issues_df.head(n=5) # View the first five complaints" + "issues_df.peek(n=5) # View an arbitrary five complaints" ] }, { @@ -433,7 +418,7 @@ { "data": { "text/html": [ - "Query job bd6b88fc-6e05-4d71-acb1-d5befaced079 is DONE. 0 Bytes processed. Open Job" + "Query job 15b352c2-783c-42b1-bc03-e5772f00381a is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -459,7 +444,7 @@ { "data": { "text/html": [ - "Query job a4d2983a-7967-4ffb-b2b7-2a387f58776b is DONE. 2.3 GB processed. Open Job" + "Query job e2152e81-b736-4a68-a25a-c5eb2b03d734 is DONE. 1.3 GB processed. Open Job" ], "text/plain": [ "" @@ -471,7 +456,7 @@ { "data": { "text/html": [ - "Query job 619b0cc2-4162-44ab-a085-e7cc5c48a02b is DONE. 80.0 kB processed. Open Job" + "Query job b1a3d20b-aee3-424c-a0c5-5b36f1177709 is DONE. 80.0 kB processed. Open Job" ], "text/plain": [ "" @@ -483,7 +468,7 @@ { "data": { "text/html": [ - "Query job 6dc5b3cf-efa6-4350-907e-ab40e3de80aa is DONE. 20.0 kB processed. Open Job" + "Query job 6b2fad50-cbc8-42ea-83c1-b5d3eaac10b9 is DONE. 20.0 kB processed. Open Job" ], "text/plain": [ "" @@ -495,7 +480,349 @@ { "data": { "text/html": [ - "Query job 70f25b9e-2d26-4dc2-9d1f-c24d36e58856 is DONE. 72.0 MB processed. Open Job" + "Query job 31896ae6-fbb5-42fb-98c4-13bd19d1adfa is DONE. 0 Bytes processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Query job 43f04543-f59b-4f1b-8598-c529324904be is DONE. 72.1 MB processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
text_embeddingstatisticsml_embed_text_statuscontent
545[ 1.82510037e-02 -1.27867460e-02 -1.57095697e-...{\"token_count\":178,\"truncated\":false}My payments have been approximately {$89.00} w...
614[ 5.40032536e-02 -5.28502129e-02 -5.33268750e-...{\"token_count\":399,\"truncated\":false}Hi, I have contacted Trans Union XXXX XXXX abo...
1236[-5.32836001e-03 -5.84292673e-02 -5.86670786e-...{\"token_count\":129,\"truncated\":false}I have a XXXX XXXX XXXX credit card on my Exp...
1477[ 3.02605387e-02 -4.37121317e-02 -2.70802993e-...{\"token_count\":16,\"truncated\":false}Wrongs information, selling my information to ...
2261[ 2.35723313e-02 -3.73509154e-02 -6.44604117e-...{\"token_count\":33,\"truncated\":false}Please investigate and delete disputed item th...
2361[ 1.04440488e-02 -9.37070698e-03 -7.36323372e-...{\"token_count\":45,\"truncated\":false}By the provisions of the Fair Credit Reporting...
2378[ 3.04989032e-02 -4.08191867e-02 -6.18648790e-...{\"token_count\":892,\"truncated\":false}Since XX/XX/XXXX I have been trying to dispute...
3133[ 0.00152804 -0.04189068 -0.04220504 -0.053740...{\"token_count\":90,\"truncated\":false}Out of the blue I received a debt collection n...
3140[ 3.11435573e-02 -4.44000624e-02 -2.10917685e-...{\"token_count\":372,\"truncated\":false}My wife and I have been sending money to XXXX ...
3322[ 2.75927987e-02 -6.23729872e-03 -3.83295454e-...{\"token_count\":36,\"truncated\":false}Phone calls from Convergent Outsourcing XXXX. ...
3583[ 9.20385588e-03 -3.83387171e-02 -6.46291822e-...{\"token_count\":52,\"truncated\":false}I recently received a copy of my credit report...
4134[-7.04960374e-04 -3.52595337e-02 -1.65264793e-...{\"token_count\":412,\"truncated\":false}I have been sending the creditor what they hav...
4496[ 3.67735326e-02 1.21120387e-03 -5.20942472e-...{\"token_count\":182,\"truncated\":false}This is my second complaint. Their response to...
5260[ 2.07133405e-02 -1.69602726e-02 -5.07124476e-...{\"token_count\":103,\"truncated\":false}XX/XX/XXXX and XX/XX/XXXX, {$3200.00} contacte...
5400[ 1.44114876e-02 -2.34710164e-02 -6.58538565e-...{\"token_count\":60,\"truncated\":false}Upon checking my XXXX credit report I noticed ...
5425[ 3.10326386e-02 -2.19427086e-02 -6.56386837e-...{\"token_count\":87,\"truncated\":false}Follow up to previous complaint XXXX XXXX XXXX...
6014[ 1.90773793e-02 -2.27493346e-02 -3.27166244e-...{\"token_count\":175,\"truncated\":false}My new XXXX lease was over always paid on time...
8192[ 0.01937891 -0.05466933 -0.06070872 -0.059028...{\"token_count\":131,\"truncated\":false}I have no idea where this account cane from. B...
8240[ 4.34123818e-03 -3.40953320e-02 -4.06381376e-...{\"token_count\":87,\"truncated\":false}I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL F...
8720[ 0.03133732 -0.03972461 -0.00178199 -0.035876...{\"token_count\":645,\"truncated\":false}XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consum...
8914[ 1.75969116e-02 -2.25022305e-02 -5.70390299e-...{\"token_count\":180,\"truncated\":false}On XX/XX/21 I sent a letter regarding inaccura...
10021[ 5.02460636e-02 -5.25112189e-02 -4.12914790e-...{\"token_count\":30,\"truncated\":false}XX/XX/XXXX and XX/XX/XXXX inaccurate informati...
10327[-0.00979626 -0.04912931 -0.08654705 -0.021063...{\"token_count\":194,\"truncated\":false}When I reviewed my credit report, I discovered...
10345[-0.04292191 -0.02636929 -0.06177032 -0.076520...{\"token_count\":262,\"truncated\":false}U.S. Bank sent two letters containing Visa Deb...
10369[ 2.16020197e-02 -5.62509745e-02 -5.93873672e-...{\"token_count\":77,\"truncated\":false}I requested from XXXX that they reverse the la...
\n", + "

25 rows × 4 columns

\n", + "
[10000 rows x 4 columns in total]" + ], + "text/plain": [ + " text_embedding \\\n", + "545 [ 1.82510037e-02 -1.27867460e-02 -1.57095697e-... \n", + "614 [ 5.40032536e-02 -5.28502129e-02 -5.33268750e-... \n", + "1236 [-5.32836001e-03 -5.84292673e-02 -5.86670786e-... \n", + "1477 [ 3.02605387e-02 -4.37121317e-02 -2.70802993e-... \n", + "2261 [ 2.35723313e-02 -3.73509154e-02 -6.44604117e-... \n", + "2361 [ 1.04440488e-02 -9.37070698e-03 -7.36323372e-... \n", + "2378 [ 3.04989032e-02 -4.08191867e-02 -6.18648790e-... \n", + "3133 [ 0.00152804 -0.04189068 -0.04220504 -0.053740... \n", + "3140 [ 3.11435573e-02 -4.44000624e-02 -2.10917685e-... \n", + "3322 [ 2.75927987e-02 -6.23729872e-03 -3.83295454e-... \n", + "3583 [ 9.20385588e-03 -3.83387171e-02 -6.46291822e-... \n", + "4134 [-7.04960374e-04 -3.52595337e-02 -1.65264793e-... \n", + "4496 [ 3.67735326e-02 1.21120387e-03 -5.20942472e-... \n", + "5260 [ 2.07133405e-02 -1.69602726e-02 -5.07124476e-... \n", + "5400 [ 1.44114876e-02 -2.34710164e-02 -6.58538565e-... \n", + "5425 [ 3.10326386e-02 -2.19427086e-02 -6.56386837e-... \n", + "6014 [ 1.90773793e-02 -2.27493346e-02 -3.27166244e-... \n", + "8192 [ 0.01937891 -0.05466933 -0.06070872 -0.059028... \n", + "8240 [ 4.34123818e-03 -3.40953320e-02 -4.06381376e-... \n", + "8720 [ 0.03133732 -0.03972461 -0.00178199 -0.035876... \n", + "8914 [ 1.75969116e-02 -2.25022305e-02 -5.70390299e-... \n", + "10021 [ 5.02460636e-02 -5.25112189e-02 -4.12914790e-... \n", + "10327 [-0.00979626 -0.04912931 -0.08654705 -0.021063... \n", + "10345 [-0.04292191 -0.02636929 -0.06177032 -0.076520... \n", + "10369 [ 2.16020197e-02 -5.62509745e-02 -5.93873672e-... \n", + "\n", + " statistics ml_embed_text_status \\\n", + "545 {\"token_count\":178,\"truncated\":false} \n", + "614 {\"token_count\":399,\"truncated\":false} \n", + "1236 {\"token_count\":129,\"truncated\":false} \n", + "1477 {\"token_count\":16,\"truncated\":false} \n", + "2261 {\"token_count\":33,\"truncated\":false} \n", + "2361 {\"token_count\":45,\"truncated\":false} \n", + "2378 {\"token_count\":892,\"truncated\":false} \n", + "3133 {\"token_count\":90,\"truncated\":false} \n", + "3140 {\"token_count\":372,\"truncated\":false} \n", + "3322 {\"token_count\":36,\"truncated\":false} \n", + "3583 {\"token_count\":52,\"truncated\":false} \n", + "4134 {\"token_count\":412,\"truncated\":false} \n", + "4496 {\"token_count\":182,\"truncated\":false} \n", + "5260 {\"token_count\":103,\"truncated\":false} \n", + "5400 {\"token_count\":60,\"truncated\":false} \n", + "5425 {\"token_count\":87,\"truncated\":false} \n", + "6014 {\"token_count\":175,\"truncated\":false} \n", + "8192 {\"token_count\":131,\"truncated\":false} \n", + "8240 {\"token_count\":87,\"truncated\":false} \n", + "8720 {\"token_count\":645,\"truncated\":false} \n", + "8914 {\"token_count\":180,\"truncated\":false} \n", + "10021 {\"token_count\":30,\"truncated\":false} \n", + "10327 {\"token_count\":194,\"truncated\":false} \n", + "10345 {\"token_count\":262,\"truncated\":false} \n", + "10369 {\"token_count\":77,\"truncated\":false} \n", + "\n", + " content \n", + "545 My payments have been approximately {$89.00} w... \n", + "614 Hi, I have contacted Trans Union XXXX XXXX abo... \n", + "1236 I have a XXXX XXXX XXXX credit card on my Exp... \n", + "1477 Wrongs information, selling my information to ... \n", + "2261 Please investigate and delete disputed item th... \n", + "2361 By the provisions of the Fair Credit Reporting... \n", + "2378 Since XX/XX/XXXX I have been trying to dispute... \n", + "3133 Out of the blue I received a debt collection n... \n", + "3140 My wife and I have been sending money to XXXX ... \n", + "3322 Phone calls from Convergent Outsourcing XXXX. ... \n", + "3583 I recently received a copy of my credit report... \n", + "4134 I have been sending the creditor what they hav... \n", + "4496 This is my second complaint. Their response to... \n", + "5260 XX/XX/XXXX and XX/XX/XXXX, {$3200.00} contacte... \n", + "5400 Upon checking my XXXX credit report I noticed ... \n", + "5425 Follow up to previous complaint XXXX XXXX XXXX... \n", + "6014 My new XXXX lease was over always paid on time... \n", + "8192 I have no idea where this account cane from. B... \n", + "8240 I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL F... \n", + "8720 XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consum... \n", + "8914 On XX/XX/21 I sent a letter regarding inaccura... \n", + "10021 XX/XX/XXXX and XX/XX/XXXX inaccurate informati... \n", + "10327 When I reviewed my credit report, I discovered... \n", + "10345 U.S. Bank sent two letters containing Visa Deb... \n", + "10369 I requested from XXXX that they reverse the la... \n", + "...\n", + "\n", + "[10000 rows x 4 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Will take ~3 minutes to compute the embeddings\n", + "predicted_embeddings = model.predict(downsampled_issues_df)\n", + "# Notice the lists of numbers that are our text embeddings for each complaint\n", + "predicted_embeddings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The model may have encountered errors while calculating embeddings for some rows. Filter out the errored rows before training the model. Alternatively, select these rows and retry the embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Query job c78e1040-2a57-42f6-8fdb-5b9524846259 is DONE. 72.1 MB processed. Open Job" ], "text/plain": [ "" @@ -507,7 +834,7 @@ { "data": { "text/html": [ - "Query job 2b2f366b-b398-4817-a610-cf71c64a8349 is DONE. 0 Bytes processed. Open Job" + "Query job 0986541b-3941-4387-b813-8888f53d149e is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -519,7 +846,7 @@ { "data": { "text/html": [ - "Query job b0e71a63-1365-4fc5-a764-b25b62387fd1 is DONE. 36.1 kB processed. Open Job" + "Query job 754aadd2-fee6-495c-acef-506f4e13c062 is DONE. 72.6 MB processed. Open Job" ], "text/plain": [ "" @@ -557,80 +884,285 @@ " \n", " \n", " \n", - " 251\n", - " [ 2.20562406e-02 -3.51827666e-02 7.63384486e-...\n", - " {\"token_count\":145,\"truncated\":false}\n", + " 545\n", + " [ 1.82510037e-02 -1.27867460e-02 -1.57095697e-...\n", + " {\"token_count\":178,\"truncated\":false}\n", " \n", - " A purse was purchased from XXXX XXXX on XX/XX/...\n", + " My payments have been approximately {$89.00} w...\n", " \n", " \n", - " 300\n", - " [ 0.01977486 -0.04289974 -0.05289588 -0.027267...\n", - " {\"token_count\":498,\"truncated\":false}\n", + " 614\n", + " [ 5.40032536e-02 -5.28502129e-02 -5.33268750e-...\n", + " {\"token_count\":399,\"truncated\":false}\n", " \n", - " XXXX XXXX XXXXXXXX has reported on my credit r...\n", + " Hi, I have contacted Trans Union XXXX XXXX abo...\n", " \n", " \n", - " 414\n", - " [ 1.37719307e-02 -4.15441953e-02 -7.81692266e-...\n", - " {\"token_count\":263,\"truncated\":false}\n", + " 1236\n", + " [-5.32836001e-03 -5.84292673e-02 -5.86670786e-...\n", + " {\"token_count\":129,\"truncated\":false}\n", " \n", - " I have tried to dispute US BKPT CT TX XXXXXXXX...\n", + " I have a XXXX XXXX XXXX credit card on my Exp...\n", " \n", " \n", - " 493\n", - " [ 4.48844060e-02 -1.40293539e-02 -3.46709713e-...\n", - " {\"token_count\":395,\"truncated\":false}\n", + " 1477\n", + " [ 3.02605387e-02 -4.37121317e-02 -2.70802993e-...\n", + " {\"token_count\":16,\"truncated\":false}\n", " \n", - " Discover Student Loan has been holding onto {$...\n", + " Wrongs information, selling my information to ...\n", " \n", " \n", - " 545\n", - " [ 1.82510037e-02 -1.27867460e-02 -1.57095697e-...\n", - " {\"token_count\":178,\"truncated\":false}\n", + " 2261\n", + " [ 2.35723313e-02 -3.73509154e-02 -6.44604117e-...\n", + " {\"token_count\":33,\"truncated\":false}\n", " \n", - " My payments have been approximately {$89.00} w...\n", + " Please investigate and delete disputed item th...\n", + " \n", + " \n", + " 2361\n", + " [ 1.04440488e-02 -9.37070698e-03 -7.36323372e-...\n", + " {\"token_count\":45,\"truncated\":false}\n", + " \n", + " By the provisions of the Fair Credit Reporting...\n", + " \n", + " \n", + " 2378\n", + " [ 3.04989032e-02 -4.08191867e-02 -6.18648790e-...\n", + " {\"token_count\":892,\"truncated\":false}\n", + " \n", + " Since XX/XX/XXXX I have been trying to dispute...\n", + " \n", + " \n", + " 3133\n", + " [ 0.00152804 -0.04189068 -0.04220504 -0.053740...\n", + " {\"token_count\":90,\"truncated\":false}\n", + " \n", + " Out of the blue I received a debt collection n...\n", + " \n", + " \n", + " 3140\n", + " [ 3.11435573e-02 -4.44000624e-02 -2.10917685e-...\n", + " {\"token_count\":372,\"truncated\":false}\n", + " \n", + " My wife and I have been sending money to XXXX ...\n", + " \n", + " \n", + " 3322\n", + " [ 2.75927987e-02 -6.23729872e-03 -3.83295454e-...\n", + " {\"token_count\":36,\"truncated\":false}\n", + " \n", + " Phone calls from Convergent Outsourcing XXXX. ...\n", + " \n", + " \n", + " 3583\n", + " [ 9.20385588e-03 -3.83387171e-02 -6.46291822e-...\n", + " {\"token_count\":52,\"truncated\":false}\n", + " \n", + " I recently received a copy of my credit report...\n", + " \n", + " \n", + " 4134\n", + " [-7.04960374e-04 -3.52595337e-02 -1.65264793e-...\n", + " {\"token_count\":412,\"truncated\":false}\n", + " \n", + " I have been sending the creditor what they hav...\n", + " \n", + " \n", + " 4496\n", + " [ 3.67735326e-02 1.21120387e-03 -5.20942472e-...\n", + " {\"token_count\":182,\"truncated\":false}\n", + " \n", + " This is my second complaint. Their response to...\n", + " \n", + " \n", + " 5260\n", + " [ 2.07133405e-02 -1.69602726e-02 -5.07124476e-...\n", + " {\"token_count\":103,\"truncated\":false}\n", + " \n", + " XX/XX/XXXX and XX/XX/XXXX, {$3200.00} contacte...\n", + " \n", + " \n", + " 5400\n", + " [ 1.44114876e-02 -2.34710164e-02 -6.58538565e-...\n", + " {\"token_count\":60,\"truncated\":false}\n", + " \n", + " Upon checking my XXXX credit report I noticed ...\n", + " \n", + " \n", + " 5425\n", + " [ 3.10326386e-02 -2.19427086e-02 -6.56386837e-...\n", + " {\"token_count\":87,\"truncated\":false}\n", + " \n", + " Follow up to previous complaint XXXX XXXX XXXX...\n", + " \n", + " \n", + " 6014\n", + " [ 1.90773793e-02 -2.27493346e-02 -3.27166244e-...\n", + " {\"token_count\":175,\"truncated\":false}\n", + " \n", + " My new XXXX lease was over always paid on time...\n", + " \n", + " \n", + " 8192\n", + " [ 0.01937891 -0.05466933 -0.06070872 -0.059028...\n", + " {\"token_count\":131,\"truncated\":false}\n", + " \n", + " I have no idea where this account cane from. B...\n", + " \n", + " \n", + " 8240\n", + " [ 4.34123818e-03 -3.40953320e-02 -4.06381376e-...\n", + " {\"token_count\":87,\"truncated\":false}\n", + " \n", + " I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL F...\n", + " \n", + " \n", + " 8720\n", + " [ 0.03133732 -0.03972461 -0.00178199 -0.035876...\n", + " {\"token_count\":645,\"truncated\":false}\n", + " \n", + " XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consum...\n", + " \n", + " \n", + " 8914\n", + " [ 1.75969116e-02 -2.25022305e-02 -5.70390299e-...\n", + " {\"token_count\":180,\"truncated\":false}\n", + " \n", + " On XX/XX/21 I sent a letter regarding inaccura...\n", + " \n", + " \n", + " 10021\n", + " [ 5.02460636e-02 -5.25112189e-02 -4.12914790e-...\n", + " {\"token_count\":30,\"truncated\":false}\n", + " \n", + " XX/XX/XXXX and XX/XX/XXXX inaccurate informati...\n", + " \n", + " \n", + " 10327\n", + " [-0.00979626 -0.04912931 -0.08654705 -0.021063...\n", + " {\"token_count\":194,\"truncated\":false}\n", + " \n", + " When I reviewed my credit report, I discovered...\n", + " \n", + " \n", + " 10345\n", + " [-0.04292191 -0.02636929 -0.06177032 -0.076520...\n", + " {\"token_count\":262,\"truncated\":false}\n", + " \n", + " U.S. Bank sent two letters containing Visa Deb...\n", + " \n", + " \n", + " 10369\n", + " [ 2.16020197e-02 -5.62509745e-02 -5.93873672e-...\n", + " {\"token_count\":77,\"truncated\":false}\n", + " \n", + " I requested from XXXX that they reverse the la...\n", " \n", " \n", "\n", - "

5 rows × 4 columns

\n", - "[5 rows x 4 columns in total]" + "

25 rows × 4 columns

\n", + "[10000 rows x 4 columns in total]" ], "text/plain": [ - " text_embedding \\\n", - "251 [ 2.20562406e-02 -3.51827666e-02 7.63384486e-... \n", - "300 [ 0.01977486 -0.04289974 -0.05289588 -0.027267... \n", - "414 [ 1.37719307e-02 -4.15441953e-02 -7.81692266e-... \n", - "493 [ 4.48844060e-02 -1.40293539e-02 -3.46709713e-... \n", - "545 [ 1.82510037e-02 -1.27867460e-02 -1.57095697e-... \n", + " text_embedding \\\n", + "545 [ 1.82510037e-02 -1.27867460e-02 -1.57095697e-... \n", + "614 [ 5.40032536e-02 -5.28502129e-02 -5.33268750e-... \n", + "1236 [-5.32836001e-03 -5.84292673e-02 -5.86670786e-... \n", + "1477 [ 3.02605387e-02 -4.37121317e-02 -2.70802993e-... \n", + "2261 [ 2.35723313e-02 -3.73509154e-02 -6.44604117e-... \n", + "2361 [ 1.04440488e-02 -9.37070698e-03 -7.36323372e-... \n", + "2378 [ 3.04989032e-02 -4.08191867e-02 -6.18648790e-... \n", + "3133 [ 0.00152804 -0.04189068 -0.04220504 -0.053740... \n", + "3140 [ 3.11435573e-02 -4.44000624e-02 -2.10917685e-... \n", + "3322 [ 2.75927987e-02 -6.23729872e-03 -3.83295454e-... \n", + "3583 [ 9.20385588e-03 -3.83387171e-02 -6.46291822e-... \n", + "4134 [-7.04960374e-04 -3.52595337e-02 -1.65264793e-... \n", + "4496 [ 3.67735326e-02 1.21120387e-03 -5.20942472e-... \n", + "5260 [ 2.07133405e-02 -1.69602726e-02 -5.07124476e-... \n", + "5400 [ 1.44114876e-02 -2.34710164e-02 -6.58538565e-... \n", + "5425 [ 3.10326386e-02 -2.19427086e-02 -6.56386837e-... \n", + "6014 [ 1.90773793e-02 -2.27493346e-02 -3.27166244e-... \n", + "8192 [ 0.01937891 -0.05466933 -0.06070872 -0.059028... \n", + "8240 [ 4.34123818e-03 -3.40953320e-02 -4.06381376e-... \n", + "8720 [ 0.03133732 -0.03972461 -0.00178199 -0.035876... \n", + "8914 [ 1.75969116e-02 -2.25022305e-02 -5.70390299e-... \n", + "10021 [ 5.02460636e-02 -5.25112189e-02 -4.12914790e-... \n", + "10327 [-0.00979626 -0.04912931 -0.08654705 -0.021063... \n", + "10345 [-0.04292191 -0.02636929 -0.06177032 -0.076520... \n", + "10369 [ 2.16020197e-02 -5.62509745e-02 -5.93873672e-... \n", "\n", - " statistics ml_embed_text_status \\\n", - "251 {\"token_count\":145,\"truncated\":false} \n", - "300 {\"token_count\":498,\"truncated\":false} \n", - "414 {\"token_count\":263,\"truncated\":false} \n", - "493 {\"token_count\":395,\"truncated\":false} \n", - "545 {\"token_count\":178,\"truncated\":false} \n", + " statistics ml_embed_text_status \\\n", + "545 {\"token_count\":178,\"truncated\":false} \n", + "614 {\"token_count\":399,\"truncated\":false} \n", + "1236 {\"token_count\":129,\"truncated\":false} \n", + "1477 {\"token_count\":16,\"truncated\":false} \n", + "2261 {\"token_count\":33,\"truncated\":false} \n", + "2361 {\"token_count\":45,\"truncated\":false} \n", + "2378 {\"token_count\":892,\"truncated\":false} \n", + "3133 {\"token_count\":90,\"truncated\":false} \n", + "3140 {\"token_count\":372,\"truncated\":false} \n", + "3322 {\"token_count\":36,\"truncated\":false} \n", + "3583 {\"token_count\":52,\"truncated\":false} \n", + "4134 {\"token_count\":412,\"truncated\":false} \n", + "4496 {\"token_count\":182,\"truncated\":false} \n", + "5260 {\"token_count\":103,\"truncated\":false} \n", + "5400 {\"token_count\":60,\"truncated\":false} \n", + "5425 {\"token_count\":87,\"truncated\":false} \n", + "6014 {\"token_count\":175,\"truncated\":false} \n", + "8192 {\"token_count\":131,\"truncated\":false} \n", + "8240 {\"token_count\":87,\"truncated\":false} \n", + "8720 {\"token_count\":645,\"truncated\":false} \n", + "8914 {\"token_count\":180,\"truncated\":false} \n", + "10021 {\"token_count\":30,\"truncated\":false} \n", + "10327 {\"token_count\":194,\"truncated\":false} \n", + "10345 {\"token_count\":262,\"truncated\":false} \n", + "10369 {\"token_count\":77,\"truncated\":false} \n", "\n", - " content \n", - "251 A purse was purchased from XXXX XXXX on XX/XX/... \n", - "300 XXXX XXXX XXXXXXXX has reported on my credit r... \n", - "414 I have tried to dispute US BKPT CT TX XXXXXXXX... \n", - "493 Discover Student Loan has been holding onto {$... \n", - "545 My payments have been approximately {$89.00} w... \n", + " content \n", + "545 My payments have been approximately {$89.00} w... \n", + "614 Hi, I have contacted Trans Union XXXX XXXX abo... \n", + "1236 I have a XXXX XXXX XXXX credit card on my Exp... \n", + "1477 Wrongs information, selling my information to ... \n", + "2261 Please investigate and delete disputed item th... \n", + "2361 By the provisions of the Fair Credit Reporting... \n", + "2378 Since XX/XX/XXXX I have been trying to dispute... \n", + "3133 Out of the blue I received a debt collection n... \n", + "3140 My wife and I have been sending money to XXXX ... \n", + "3322 Phone calls from Convergent Outsourcing XXXX. ... \n", + "3583 I recently received a copy of my credit report... \n", + "4134 I have been sending the creditor what they hav... \n", + "4496 This is my second complaint. Their response to... \n", + "5260 XX/XX/XXXX and XX/XX/XXXX, {$3200.00} contacte... \n", + "5400 Upon checking my XXXX credit report I noticed ... \n", + "5425 Follow up to previous complaint XXXX XXXX XXXX... \n", + "6014 My new XXXX lease was over always paid on time... \n", + "8192 I have no idea where this account cane from. B... \n", + "8240 I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL F... \n", + "8720 XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consum... \n", + "8914 On XX/XX/21 I sent a letter regarding inaccura... \n", + "10021 XX/XX/XXXX and XX/XX/XXXX inaccurate informati... \n", + "10327 When I reviewed my credit report, I discovered... \n", + "10345 U.S. Bank sent two letters containing Visa Deb... \n", + "10369 I requested from XXXX that they reverse the la... \n", + "...\n", "\n", - "[5 rows x 4 columns]" + "[10000 rows x 4 columns]" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Will take ~3 minutes to compute the embeddings\n", - "predicted_embeddings = model.predict(downsampled_issues_df)\n", - "# Notice the lists of numbers that are our text embeddings for each complaint\n", - "predicted_embeddings.head() " + "successful_rows = (\n", + " (predicted_embeddings[\"ml_embed_text_status\"] == \"\")\n", + " # Series.str.len() gives the length of an array.\n", + " # See: https://stackoverflow.com/a/41340543/101923\n", + " & (predicted_embeddings[\"text_embedding\"].str.len() != 0)\n", + ")\n", + "predicted_embeddings = predicted_embeddings[successful_rows]\n", + "predicted_embeddings\n" ] }, { @@ -653,7 +1185,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": { "id": "AhNTnEC5FRz2" }, @@ -674,7 +1206,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "id": "6poSxh-fGJF7" }, @@ -682,7 +1214,7 @@ { "data": { "text/html": [ - "Query job 37f432dd-9ed7-4bbd-adc1-f33b8cbab33a is DONE. 61.5 MB processed. Open Job" + "Query job fa4bbc13-3831-4c80-9b59-9939e605ed58 is DONE. 61.7 MB processed. Open Job" ], "text/plain": [ "" @@ -694,7 +1226,7 @@ { "data": { "text/html": [ - "Query job 8ca9cc5a-091a-4d4e-bcf8-04d4bfec7b6b is DONE. 0 Bytes processed. Open Job" + "Query job d2d681aa-e49a-4fda-89fd-60cf906d3aec is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -706,7 +1238,7 @@ { "data": { "text/html": [ - "Query job cdc11d15-fa78-4190-841f-18642ddb53f8 is DONE. 72.3 MB processed. Open Job" + "Query job 234bb6be-625c-4c96-baea-c37c33410114 is DONE. 72.7 MB processed. Open Job" ], "text/plain": [ "" @@ -718,7 +1250,7 @@ { "data": { "text/html": [ - "Query job 39a6ea59-0e3d-4d69-bf8a-1502b9f1a48f is DONE. 80.0 kB processed. Open Job" + "Query job 285817cb-99d3-426f-82c3-89d36119e8db is DONE. 80.0 kB processed. Open Job" ], "text/plain": [ "" @@ -730,31 +1262,7 @@ { "data": { "text/html": [ - "Query job 324ab354-ecbd-4bde-8f73-806856a53a19 is DONE. 73.2 MB processed. Open Job" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Query job cb7558b2-a967-491c-82db-e11116f1fba4 is DONE. 0 Bytes processed. Open Job" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Query job 0cfc2298-e67b-4a03-804d-f4abd8d56da0 is DONE. 36.6 kB processed. Open Job" + "Query job 3a39d2b0-55a1-4922-972a-8806b387f877 is DONE. 73.3 MB processed. Open Job" ], "text/plain": [ "" @@ -794,88 +1302,85 @@ " \n", " \n", " \n", - " 251\n", - " 2\n", - " [{'CENTROID_ID': 2, 'DISTANCE': 0.534540549592...\n", - " [ 2.20562406e-02 -3.51827666e-02 7.63384486e-...\n", - " {\"token_count\":145,\"truncated\":false}\n", + " 182250\n", + " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.570560301900...\n", + " [ 4.70298417e-02 -4.08669300e-02 -2.99868709e-...\n", + " {\"token_count\":10,\"truncated\":false}\n", " \n", - " A purse was purchased from XXXX XXXX on XX/XX/...\n", + " These are not my accounts. Please remove them.\n", " \n", " \n", - " 300\n", + " 3023485\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.437379245910...\n", - " [ 0.01977486 -0.04289974 -0.05289588 -0.027267...\n", - " {\"token_count\":498,\"truncated\":false}\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.523572693768...\n", + " [ 1.55437263e-02 -1.93240177e-02 -2.48466972e-...\n", + " {\"token_count\":10,\"truncated\":false}\n", " \n", - " XXXX XXXX XXXXXXXX has reported on my credit r...\n", + " This debt is not mine due to identity theft.\n", " \n", " \n", - " 414\n", + " 407254\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.482813493921...\n", - " [ 1.37719307e-02 -4.15441953e-02 -7.81692266e-...\n", - " {\"token_count\":263,\"truncated\":false}\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.515173566816...\n", + " [-0.01293471 -0.01959546 -0.02238463 -0.066214...\n", + " {\"token_count\":10,\"truncated\":false}\n", " \n", - " I have tried to dispute US BKPT CT TX XXXXXXXX...\n", + " I do not owe this company money!!!!!\n", " \n", " \n", - " 493\n", - " 9\n", - " [{'CENTROID_ID': 9, 'DISTANCE': 0.561752335987...\n", - " [ 4.48844060e-02 -1.40293539e-02 -3.46709713e-...\n", - " {\"token_count\":395,\"truncated\":false}\n", + " 1509454\n", + " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.645342721754...\n", + " [ 3.21860723e-02 -2.67103072e-02 -4.78175096e-...\n", + " {\"token_count\":10,\"truncated\":false}\n", " \n", - " Discover Student Loan has been holding onto {$...\n", + " VIOLATES HIPPA AND CRA\n", " \n", " \n", - " 545\n", - " 9\n", - " [{'CENTROID_ID': 9, 'DISTANCE': 0.540487926907...\n", - " [ 1.82510037e-02 -1.27867460e-02 -1.57095697e-...\n", - " {\"token_count\":178,\"truncated\":false}\n", + " 2357848\n", + " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.519872186251...\n", + " [-1.88122243e-02 -2.68064123e-02 -4.69480827e-...\n", + " {\"token_count\":10,\"truncated\":false}\n", " \n", - " My payments have been approximately {$89.00} w...\n", + " Receive numerous phone calls. I have no debt.\n", " \n", " \n", "\n", - "

5 rows × 6 columns

\n", - "[5 rows x 6 columns in total]" + "" ], "text/plain": [ - " CENTROID_ID NEAREST_CENTROIDS_DISTANCE \\\n", - "251 2 [{'CENTROID_ID': 2, 'DISTANCE': 0.534540549592... \n", - "300 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.437379245910... \n", - "414 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.482813493921... \n", - "493 9 [{'CENTROID_ID': 9, 'DISTANCE': 0.561752335987... \n", - "545 9 [{'CENTROID_ID': 9, 'DISTANCE': 0.540487926907... \n", + " CENTROID_ID NEAREST_CENTROIDS_DISTANCE \\\n", + "182250 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.570560301900... \n", + "3023485 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.523572693768... \n", + "407254 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.515173566816... \n", + "1509454 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.645342721754... \n", + "2357848 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.519872186251... \n", "\n", - " text_embedding \\\n", - "251 [ 2.20562406e-02 -3.51827666e-02 7.63384486e-... \n", - "300 [ 0.01977486 -0.04289974 -0.05289588 -0.027267... \n", - "414 [ 1.37719307e-02 -4.15441953e-02 -7.81692266e-... \n", - "493 [ 4.48844060e-02 -1.40293539e-02 -3.46709713e-... \n", - "545 [ 1.82510037e-02 -1.27867460e-02 -1.57095697e-... \n", + " text_embedding \\\n", + "182250 [ 4.70298417e-02 -4.08669300e-02 -2.99868709e-... \n", + "3023485 [ 1.55437263e-02 -1.93240177e-02 -2.48466972e-... \n", + "407254 [-0.01293471 -0.01959546 -0.02238463 -0.066214... \n", + "1509454 [ 3.21860723e-02 -2.67103072e-02 -4.78175096e-... \n", + "2357848 [-1.88122243e-02 -2.68064123e-02 -4.69480827e-... \n", "\n", - " statistics ml_embed_text_status \\\n", - "251 {\"token_count\":145,\"truncated\":false} \n", - "300 {\"token_count\":498,\"truncated\":false} \n", - "414 {\"token_count\":263,\"truncated\":false} \n", - "493 {\"token_count\":395,\"truncated\":false} \n", - "545 {\"token_count\":178,\"truncated\":false} \n", + " statistics ml_embed_text_status \\\n", + "182250 {\"token_count\":10,\"truncated\":false} \n", + "3023485 {\"token_count\":10,\"truncated\":false} \n", + "407254 {\"token_count\":10,\"truncated\":false} \n", + "1509454 {\"token_count\":10,\"truncated\":false} \n", + "2357848 {\"token_count\":10,\"truncated\":false} \n", "\n", - " content \n", - "251 A purse was purchased from XXXX XXXX on XX/XX/... \n", - "300 XXXX XXXX XXXXXXXX has reported on my credit r... \n", - "414 I have tried to dispute US BKPT CT TX XXXXXXXX... \n", - "493 Discover Student Loan has been holding onto {$... \n", - "545 My payments have been approximately {$89.00} w... \n", - "\n", - "[5 rows x 6 columns]" + " content \n", + "182250 These are not my accounts. Please remove them. \n", + "3023485 This debt is not mine due to identity theft. \n", + "407254 I do not owe this company money!!!!! \n", + "1509454 VIOLATES HIPPA AND CRA \n", + "2357848 Receive numerous phone calls. I have no debt. " ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -886,7 +1391,7 @@ "clustered_result = cluster_model.predict(predicted_embeddings)\n", "# Notice the CENTROID_ID column, which is the ID number of the group that\n", "# each complaint belongs to.\n", - "clustered_result.head(n=5)" + "clustered_result.peek(n=5)" ] }, { @@ -904,7 +1409,7 @@ "id": "21rNsFMHo8hO" }, "source": [ - "## Step 3: Use PaLM2 LLM model to summarize complaint clusters" + "## Step 3: Use Gemini to summarize complaint clusters" ] }, { @@ -917,7 +1422,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": { "id": "2E7wXM_jGqo6" }, @@ -925,7 +1430,7 @@ { "data": { "text/html": [ - "Query job 84f95981-01c7-49ca-a10c-5842f07d867f is DONE. 10.6 MB processed. Open Job" + "Query job 85ead687-4ba9-44bf-88da-23a066f45960 is DONE. 10.7 MB processed. Open Job" ], "text/plain": [ "" @@ -937,7 +1442,7 @@ { "data": { "text/html": [ - "Query job 0872869a-94f0-4c3f-9f92-da7272f95cd0 is DONE. 10.6 MB processed. Open Job" + "Query job 68ef20cd-220d-40a9-bb42-63ed3d6f5d3f is DONE. 10.7 MB processed. Open Job" ], "text/plain": [ "" @@ -963,7 +1468,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": { "id": "ZNDiueI9IP5e" }, @@ -973,48 +1478,42 @@ "output_type": "stream", "text": [ "comment list 1:\n", - "1. XXXX XXXX XXXXXXXX has reported on my credit report for XXXX that I had 3 payments 30 or more days past due this information was and is incorrect i have sent numerous letters and responses to them asking for information regarding this reporting the bank continues to send me responces stating that my credit report is accurat but no documentation supporting the reporting of negative payments i have sent the XXXX XXXX XXXX numerous document showing past mistake of my payment that were misapplied by thier associates yet they still will not correct my credit report the XXXX XXXX XXXX has the wrong infor mation on my payment, balance last payments amount made & ect i have recently sent them a letter ( This is the exact letter ) and other information im having problems with the problem im having Date BLANK BLANK BLANK Complaint Dispute on credit report reporting XXXX XXXX XXXX Address Mail XXXX fl XXXX XXXX XXXX XXXX FL Zip Code XXXX Regarding Account Number ( Blank ) & Account Number ( Blank ) I ( blank ) on this XXXX Day of XXXX XXXX do hereby request a complete copy of my payment history from XXXX XXXX XXXX to XXXX XXXX XXXX this request is to include dates of all payments that was made on these accounts dates of all payments that were past due on these accounts and dates payments was paid to these account s after due date. I ( Blank ) am also requesting all letters and correspondence advising me / making me aware of my past due payments I also request all payments showing my account were paid 30 or more days late I ( Blank ) also request all payment dates and how payment was made ( Ex XXXX XXXX XXXX XXXXXXXX. XXXX Please Mail this information ASAP\n", - "2. I have tried to dispute US BKPT CT TX XXXXXXXX XXXX XXXX Account number XXXX on my credit report several times as inaccurate ( XX/XX/XXXX XX/XX/XXXX XX/XX/XXXX and XX/XX/XXXX ) as not mine but the credit bureaus have verified that it is accurate. I mailed a letter to the county records office ( XX/XX/XXXX ) pertaining to US BKPT CT TX XXXX Account number XXXX where this occurred and they replied on XX/XX/XXXX stating that they do not send information to the credit bureaus. This is where I am confused because after reading the law the FCRA if the original creditor or institution can not verify the information then the credit bureaus reporting it have to delete it. I sent this into the credit bureaus as well along with the paperwork from the county record office and it seems like they are not doing ANY sort of investigation which they are required to\n", - "3. Unauthorized hard inquires on my credit report and bureau refuses to remove them after they placed them on my credit report with my authorization XXXX Inquiry XX/XX/XXXX XXXX XXXX Inquiry XX/XX/XXXX XXXX XXXX inquiry XX/XX/XXXX and XX/XX/XXXX XXXX XX/XX/XXXX XXXX\n", - "4. XXXX - XX/XX/2020 Equifax Hello, I have reviewed a copy of my current credit report and it shows the inquiries above are fraudulent and I have sent a copy of FTC report to request removal. I have not been successful and need these removed due to attempting to purchase a home.\n", - "5. I have tried to remedy the issue that XXXX XXXX caused as XXXX XXXX indicated to me that Equifax could not remove a discharge debt from my report because of two social security numbers. In fact it was not two ssn 's but rather my deceased husbands DOB XXXX was on my report. XXXX XXXX is notorious for reporting incorrect data so i am going to get this resolved through CEPB. Attached please find the bankruptcy discharge notification and a copy of the requested drivers license with my DOB, XXXX. I expect that the debt for XXXX placed by XXXX XXXX to be removed as all the requested documentation is included in the correspondence.\n", + "1. Wrongs information, selling my information to third party. Incorrect reporting\n", + "2. I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL FROM XXXX XXXX XXXX XXXX WHICH ENDED A A LIE. THEY ALSO, PULLED MANY OTHERS I DID NT AGREED TO. SOLF PULLS ONLY\n", + "3. XX/XX/XXXX and XX/XX/XXXX inaccurate information reported 30 days late.\n", + "4. Im working on myCredit and I see a couple of inquiries that I have no idea where they came from.\n", + "5. I request a copy of all dispute results and documentary evidence from XXXX, and XXXX mailed to me\n", "\n", "comment list 2:\n", - "1. A purse was purchased from XXXX XXXX on XX/XX/2021. As they stated my package was delivered and signed for. Indeed it was, I also still have it. My package was damaged and only XXXX can file a claim with XXXX. I have tried to contact XXXX and was told to contact the shipper. I have contacted XXXX and was told that a chargeback was filed with TCF so there is nothing that they can do. I have provided all of these documents to you all including a police report. I will continue to escalate this if needed.\n", - "2. On Saturday XX/XX/XXXX I submitted a dispute for several fraudulent transactions with chime. I had stopped using chime back in XXXX of 2022 because I believed they were scamming me because of the money that was being taken out of my account without my knowledge and i had also had my phone stolen at work with my chime card and ssi & ID in the back of my phone case. I told them I believed thats how i was scammed for all of those fraudulent transactions that I disputed and also that money from my job had still been posting in my chime account after i had stopped using it because i was unable to change my direct deposit information with my job because i couldnt access my chime account anymore so I didnt have access to my routing and account number but i still had my card connected to my apple pay which I eventually stopped using because i was getting paid but most time my card on apple pay was declined because i was still being scammed out my money. Ive tried ordering new cards several time before I completely stopped using chime however the same thing was happening with each card and chime couldnt figure out how to stop it. \n", - "A few days ago i tried to create a new chime bank account because i had completely forgotten about my old scammed chime account but that app prompted me to sign into my old account Chime told me that they would do a complete investigation and let me know of the results. \n", - "However, on Sunday XX/XX/XXXX at XXXX i got an email from chime stating Hello XXXX XXXX XXXX This letter is to inform you that we have made a final determination regarding the claim referenced above. \n", - "Based on our investigation, we have concluded no error occurred. Therefore, no funds will be credited to your account and this claim is considered closed. \n", + "1. My wife and I have been sending money to XXXX via my brother-in-law to finish a building project we have been working on since XXXX with target date of completion by XX/XX/XXXX. In XXXX XXXX my brother-in-law in was contacted by his bank to confirm he was not defrauding my wife. My brother-in-law confirmed he was helping to handle the building project by organizing and paying the workers. In XXXX XXXX Bank of America reach out to my wife to update her profile to avoid account restrictions. My wife 's account was eventually restricted until she called and confirmed her employment and other personal information. My wife 's full account activities were then restored and we continued sending wire transfers to XXXX via her checking account. \n", + "Then I received a letter dated XXXX XXXX XXXX from Bank of America stating the money market account I share with my wife which has been opened since XXXX will be will be restricted from use in 21 days and closed in 30 days with no reason. I strongly believe this is a result of the legal international wires because there was no reason to close the Savings account which had with hardly any activity. \n", + "I agree that Bank of America has a right to close accounts but I do not agree with Bank of America closing accounts because of international transactions unless they can prove fraud, criminal activity or support for terrorism, this is discriminatory towards foreign nationals. How are foreign nationals suppose to make investments or support their family/community if they are excluded from the banking system?\n", + "2. XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consumer Financial Protection Bureau XXXX XXXX XXXX XXXX, IA XXXX Dear Sir or Madam : In XX/XX/XXXX Out of the blue JP Morgan Chase arbitrarily closed my account. This was after my mother is a XXXX survivor who is over XXXX years old and for whom I have a general power of attorney and take care of her bill paying was questioned about a transaction. She is also XXXX XXXX. \n", "\n", - "I emailed them back for the documentation on how they did the dispute and verified the information was inaccurate. I also asked how is it possible that a investigation for 20+ transactions had been completed in less than 1 day. \n", - "I truly believe chime did not do any investigation at all and that they lied about my investigation/dispute They have been unable to provide me with the documents and dispute information. In fact, they said id have to wait 10+ business days to receive that information in mail because they arent going to email it to me I found this very unfortunate. \n", + "I have reason to belief that a mentally disturbed family member for whom I have an order of protection initiated this situation. This individual has ben harassing me and other members of my family for a considerable amount of time. \n", "\n", - "Here are my reference numbers for my disputes XXXX XXXX\n", - "3. My sister and I attempted to close our aging father 's professional account at Wells Fargo. My sister worked closely with the local branch and followed their directives. But, even after 6 months the bank continued to reject the paperwork presented. Anticipating our father 's further decline, we hired an estate attorney to help us close the account. Even after appointing me as an officer of my dad 's company and presenting new documentation which followed the bank 's directive, the bank again refused to move the money to my mother and to close the account. The entire time the balance of the account was depleted through bank fees from around {$1800.00} to around {$1000.00}. In the end, Wells Fargo refunded the entire original balance of around {$1800.00} plus some a \" consumer satisfaction credit '' and transferred the funds to my mother 's account.\n", - "4. On XXXX XX/XX/2020 a sale of three Tahitian pearl necklaces was made by my online shop. This amounted to approximately {$2800.00}. Some of the funds were withdrawn by me leaving a total of {$220000.00} in the Paypal account. \n", + "The bank initially was satisfied with her response. However within 2 days they closed the account of a XXXX year old XXXX XXXX person. \n", "\n", - "Suddenly the balance was frozen for 180 days by Paypal. A telephone call to their customer service on about XXXX XXXX resulted in a rude and abrupt lady telling me that Paypal was parting ways with me. I was given no reasonable explanation why the account was limited or the funds frozen other than to infer that I was busy with fraudulent activity. The rude person refused to give me more details. \n", + "Soon after for no reason my account was closed as well. I tried to reach out to the corporate offices of Chase and make great effort to find out what happened and to restore my account as well as my mothers but I was unsuccessful. In addition the people I spoke to were not only unhelpful bu exceedingly rude. \n", "\n", - "I submitted all the details/documents they requested from then on. My appeals were refused, On XXXX XX/XX/2020 the 180 frozen days expired. I was requested by Paypal to submit all my personal details/documents again. I did. I have the XXXX XXXX XXXX XXXX ID Document. It is perfectly legal and valid in XXXX XXXX as is the new XXXX XXXXXXXX which is now being issued. But Paypal is not accepting my ID Document. So my funds are not being released.They send computer generated emails which offer no explanation as to why my ID document is not being accepted or any solution to the problem. \n", + "I should add that I have had an account with Chase since XX/XX/XXXX and took care of my ailing father before he passed away for over 25 years as well. I am now taking care of my mother for over 28 years. \n", "\n", - "This matter is most frustrating and I'm sure not in line with any financial practice.\n", - "5. On XX/XX/XXXX I received a text from Chase showing -- -- -- -- - Chase Fraud : Did you attempt a {$1700.00} withdrawal on XX/XX/XXXX with card XXXX? \n", - "replay yes or no. Msg & data rates may apply. \n", - "-- -- -- -- - Then I replied no, Then chase sent a text they will close my account, give them a call. \n", + "I went so far as contacting a prominent Television reporter who was interested in doing a report on what happened. \n", "\n", - "So I called chase to report, they are saying because it is pending transaction I have to wait until pending is gone, 2 days later they accepted my claim. \n", + "I have since managed to open an account at another bank but this week I had reason to go to a branch of Chase regarding another issue and a manager using my That is a very serious unsubstantiated accusation and given this information I have no choice but to submit this complaint. \n", "\n", - "Today XX/XX/XXXX they refused to credit my money {$1700.00} because my pin number and debit card were used. So I told them I never received card. \n", - "Still their answer is same. \n", + "I have no interest in having an account again at a disreputable bank like Chase but I can not and will not accept or tolerate a derogatory accusation be associated with my name. \n", "\n", - "Then I asked what should I do? file small claim against chase? go to police office? \n", - "They told me I can go to police office to file a claim. \n", + "I hope that my complaint will hAve the desired effect of removing this derogatory unsubstantiated accusation be removed from my name. However. I will not let this unfair matter stand and Chase ought to know that I have already retained an attorney and will if necessary hold Chase responsible and liable all damage i have incurred now And in the future Enclosed, please find the letter from Chase stating that they were closing my mothers account and a similar letter was received by me too. \n", "\n", - "I will go to police office after my work. \n", + "Also please find a letter from her Doctor stating that she is XXXX XXXX. \n", "\n", - "Before I go to police office, I am asking your help about this situation. \n", + "Thank you. \n", "\n", - "Please help me.\n", + "XXXX XXXX\n", + "3. U.S. Bank sent two letters containing Visa Debit Cards to our address on XX/XX/2021. One Visa Debit Card is in the name of XXXX XXXX and one Visa Debit Card is in the name of XXXX XXXX. These cards supposedly link to existing checking accounts at U.S. Bank. However : ( 1 ) Neither of us have existing checking accounts at U.S. Bank, ( 2 ) Neither of us solicited a bank account at U.S. Bank, and ( 3 ) Neither of us solicited a Visa Debit Card. We have attempted to call U.S. Bank at the phone numbers provided in the letters but are only able to access an automated system which will not proceed without us establishing accounts and activating these cards. We are concerned here that one of two things has happened : either ( 1 ) we are victims of identity theft and some third party is trying to establish accounts in our name, or ( 2 ) U.S. Bank is engaged in bank fraud. In either case, we request the assistance of the Consumer Financial Protection Bureau. Thank you.\n", + "4. I contacted my bank over 3 times about this amount, the first two times I spoke to gentleman that agreed with me that I didnt get back a certain amount of dollars back, I did the math and they refuse to see that I do not owe this amount because I never had it in the first place. I wrote out all my charges and connected it to the charges made back from the consumer and I was missing XXXX, I called XXXX they said they gave it all back which is not their fault because they showed me proof. Along the lines Capital One does not want to take responsibility for the missing money. I have wrote everything out and then its not adding up, they keep saying that they did a charge back twice which is incorrect. My balance was at XXXX before I made this purchase and it shouldve been returned back to XXXX because I return all the items and nothing is in my possession. I have proof that I returned everything.\n", + "5. CB INDIGO ( Bank ) XX/XX/2022 I just recently got off the phone with the company and they wont put in a request of removal of a fraudulent hard inquiry from Insigo Mastercard to XXXX. They dont even have my information on file, I called 3 times most of them are lazy and was giving me a hard time.\n", "\n" ] } @@ -1037,7 +1536,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": { "id": "BfHGJLirzSvH" }, @@ -1048,47 +1547,41 @@ "text": [ "Please highlight the most obvious difference between the two lists of comments:\n", "comment list 1:\n", - "1. XXXX XXXX XXXXXXXX has reported on my credit report for XXXX that I had 3 payments 30 or more days past due this information was and is incorrect i have sent numerous letters and responses to them asking for information regarding this reporting the bank continues to send me responces stating that my credit report is accurat but no documentation supporting the reporting of negative payments i have sent the XXXX XXXX XXXX numerous document showing past mistake of my payment that were misapplied by thier associates yet they still will not correct my credit report the XXXX XXXX XXXX has the wrong infor mation on my payment, balance last payments amount made & ect i have recently sent them a letter ( This is the exact letter ) and other information im having problems with the problem im having Date BLANK BLANK BLANK Complaint Dispute on credit report reporting XXXX XXXX XXXX Address Mail XXXX fl XXXX XXXX XXXX XXXX FL Zip Code XXXX Regarding Account Number ( Blank ) & Account Number ( Blank ) I ( blank ) on this XXXX Day of XXXX XXXX do hereby request a complete copy of my payment history from XXXX XXXX XXXX to XXXX XXXX XXXX this request is to include dates of all payments that was made on these accounts dates of all payments that were past due on these accounts and dates payments was paid to these account s after due date. I ( Blank ) am also requesting all letters and correspondence advising me / making me aware of my past due payments I also request all payments showing my account were paid 30 or more days late I ( Blank ) also request all payment dates and how payment was made ( Ex XXXX XXXX XXXX XXXXXXXX. XXXX Please Mail this information ASAP\n", - "2. I have tried to dispute US BKPT CT TX XXXXXXXX XXXX XXXX Account number XXXX on my credit report several times as inaccurate ( XX/XX/XXXX XX/XX/XXXX XX/XX/XXXX and XX/XX/XXXX ) as not mine but the credit bureaus have verified that it is accurate. I mailed a letter to the county records office ( XX/XX/XXXX ) pertaining to US BKPT CT TX XXXX Account number XXXX where this occurred and they replied on XX/XX/XXXX stating that they do not send information to the credit bureaus. This is where I am confused because after reading the law the FCRA if the original creditor or institution can not verify the information then the credit bureaus reporting it have to delete it. I sent this into the credit bureaus as well along with the paperwork from the county record office and it seems like they are not doing ANY sort of investigation which they are required to\n", - "3. Unauthorized hard inquires on my credit report and bureau refuses to remove them after they placed them on my credit report with my authorization XXXX Inquiry XX/XX/XXXX XXXX XXXX Inquiry XX/XX/XXXX XXXX XXXX inquiry XX/XX/XXXX and XX/XX/XXXX XXXX XX/XX/XXXX XXXX\n", - "4. XXXX - XX/XX/2020 Equifax Hello, I have reviewed a copy of my current credit report and it shows the inquiries above are fraudulent and I have sent a copy of FTC report to request removal. I have not been successful and need these removed due to attempting to purchase a home.\n", - "5. I have tried to remedy the issue that XXXX XXXX caused as XXXX XXXX indicated to me that Equifax could not remove a discharge debt from my report because of two social security numbers. In fact it was not two ssn 's but rather my deceased husbands DOB XXXX was on my report. XXXX XXXX is notorious for reporting incorrect data so i am going to get this resolved through CEPB. Attached please find the bankruptcy discharge notification and a copy of the requested drivers license with my DOB, XXXX. I expect that the debt for XXXX placed by XXXX XXXX to be removed as all the requested documentation is included in the correspondence.\n", + "1. Wrongs information, selling my information to third party. Incorrect reporting\n", + "2. I TIED TO BUY CAR AT XXXX, THEY GOT APPROVAL FROM XXXX XXXX XXXX XXXX WHICH ENDED A A LIE. THEY ALSO, PULLED MANY OTHERS I DID NT AGREED TO. SOLF PULLS ONLY\n", + "3. XX/XX/XXXX and XX/XX/XXXX inaccurate information reported 30 days late.\n", + "4. Im working on myCredit and I see a couple of inquiries that I have no idea where they came from.\n", + "5. I request a copy of all dispute results and documentary evidence from XXXX, and XXXX mailed to me\n", "comment list 2:\n", - "1. A purse was purchased from XXXX XXXX on XX/XX/2021. As they stated my package was delivered and signed for. Indeed it was, I also still have it. My package was damaged and only XXXX can file a claim with XXXX. I have tried to contact XXXX and was told to contact the shipper. I have contacted XXXX and was told that a chargeback was filed with TCF so there is nothing that they can do. I have provided all of these documents to you all including a police report. I will continue to escalate this if needed.\n", - "2. On Saturday XX/XX/XXXX I submitted a dispute for several fraudulent transactions with chime. I had stopped using chime back in XXXX of 2022 because I believed they were scamming me because of the money that was being taken out of my account without my knowledge and i had also had my phone stolen at work with my chime card and ssi & ID in the back of my phone case. I told them I believed thats how i was scammed for all of those fraudulent transactions that I disputed and also that money from my job had still been posting in my chime account after i had stopped using it because i was unable to change my direct deposit information with my job because i couldnt access my chime account anymore so I didnt have access to my routing and account number but i still had my card connected to my apple pay which I eventually stopped using because i was getting paid but most time my card on apple pay was declined because i was still being scammed out my money. Ive tried ordering new cards several time before I completely stopped using chime however the same thing was happening with each card and chime couldnt figure out how to stop it. \n", - "A few days ago i tried to create a new chime bank account because i had completely forgotten about my old scammed chime account but that app prompted me to sign into my old account Chime told me that they would do a complete investigation and let me know of the results. \n", - "However, on Sunday XX/XX/XXXX at XXXX i got an email from chime stating Hello XXXX XXXX XXXX This letter is to inform you that we have made a final determination regarding the claim referenced above. \n", - "Based on our investigation, we have concluded no error occurred. Therefore, no funds will be credited to your account and this claim is considered closed. \n", + "1. My wife and I have been sending money to XXXX via my brother-in-law to finish a building project we have been working on since XXXX with target date of completion by XX/XX/XXXX. In XXXX XXXX my brother-in-law in was contacted by his bank to confirm he was not defrauding my wife. My brother-in-law confirmed he was helping to handle the building project by organizing and paying the workers. In XXXX XXXX Bank of America reach out to my wife to update her profile to avoid account restrictions. My wife 's account was eventually restricted until she called and confirmed her employment and other personal information. My wife 's full account activities were then restored and we continued sending wire transfers to XXXX via her checking account. \n", + "Then I received a letter dated XXXX XXXX XXXX from Bank of America stating the money market account I share with my wife which has been opened since XXXX will be will be restricted from use in 21 days and closed in 30 days with no reason. I strongly believe this is a result of the legal international wires because there was no reason to close the Savings account which had with hardly any activity. \n", + "I agree that Bank of America has a right to close accounts but I do not agree with Bank of America closing accounts because of international transactions unless they can prove fraud, criminal activity or support for terrorism, this is discriminatory towards foreign nationals. How are foreign nationals suppose to make investments or support their family/community if they are excluded from the banking system?\n", + "2. XXXX XXXX XXXX XXXX, NY XXXX XX/XX/XXXX Consumer Financial Protection Bureau XXXX XXXX XXXX XXXX, IA XXXX Dear Sir or Madam : In XX/XX/XXXX Out of the blue JP Morgan Chase arbitrarily closed my account. This was after my mother is a XXXX survivor who is over XXXX years old and for whom I have a general power of attorney and take care of her bill paying was questioned about a transaction. She is also XXXX XXXX. \n", "\n", - "I emailed them back for the documentation on how they did the dispute and verified the information was inaccurate. I also asked how is it possible that a investigation for 20+ transactions had been completed in less than 1 day. \n", - "I truly believe chime did not do any investigation at all and that they lied about my investigation/dispute They have been unable to provide me with the documents and dispute information. In fact, they said id have to wait 10+ business days to receive that information in mail because they arent going to email it to me I found this very unfortunate. \n", + "I have reason to belief that a mentally disturbed family member for whom I have an order of protection initiated this situation. This individual has ben harassing me and other members of my family for a considerable amount of time. \n", "\n", - "Here are my reference numbers for my disputes XXXX XXXX\n", - "3. My sister and I attempted to close our aging father 's professional account at Wells Fargo. My sister worked closely with the local branch and followed their directives. But, even after 6 months the bank continued to reject the paperwork presented. Anticipating our father 's further decline, we hired an estate attorney to help us close the account. Even after appointing me as an officer of my dad 's company and presenting new documentation which followed the bank 's directive, the bank again refused to move the money to my mother and to close the account. The entire time the balance of the account was depleted through bank fees from around {$1800.00} to around {$1000.00}. In the end, Wells Fargo refunded the entire original balance of around {$1800.00} plus some a \" consumer satisfaction credit '' and transferred the funds to my mother 's account.\n", - "4. On XXXX XX/XX/2020 a sale of three Tahitian pearl necklaces was made by my online shop. This amounted to approximately {$2800.00}. Some of the funds were withdrawn by me leaving a total of {$220000.00} in the Paypal account. \n", + "The bank initially was satisfied with her response. However within 2 days they closed the account of a XXXX year old XXXX XXXX person. \n", "\n", - "Suddenly the balance was frozen for 180 days by Paypal. A telephone call to their customer service on about XXXX XXXX resulted in a rude and abrupt lady telling me that Paypal was parting ways with me. I was given no reasonable explanation why the account was limited or the funds frozen other than to infer that I was busy with fraudulent activity. The rude person refused to give me more details. \n", + "Soon after for no reason my account was closed as well. I tried to reach out to the corporate offices of Chase and make great effort to find out what happened and to restore my account as well as my mothers but I was unsuccessful. In addition the people I spoke to were not only unhelpful bu exceedingly rude. \n", "\n", - "I submitted all the details/documents they requested from then on. My appeals were refused, On XXXX XX/XX/2020 the 180 frozen days expired. I was requested by Paypal to submit all my personal details/documents again. I did. I have the XXXX XXXX XXXX XXXX ID Document. It is perfectly legal and valid in XXXX XXXX as is the new XXXX XXXXXXXX which is now being issued. But Paypal is not accepting my ID Document. So my funds are not being released.They send computer generated emails which offer no explanation as to why my ID document is not being accepted or any solution to the problem. \n", + "I should add that I have had an account with Chase since XX/XX/XXXX and took care of my ailing father before he passed away for over 25 years as well. I am now taking care of my mother for over 28 years. \n", "\n", - "This matter is most frustrating and I'm sure not in line with any financial practice.\n", - "5. On XX/XX/XXXX I received a text from Chase showing -- -- -- -- - Chase Fraud : Did you attempt a {$1700.00} withdrawal on XX/XX/XXXX with card XXXX? \n", - "replay yes or no. Msg & data rates may apply. \n", - "-- -- -- -- - Then I replied no, Then chase sent a text they will close my account, give them a call. \n", + "I went so far as contacting a prominent Television reporter who was interested in doing a report on what happened. \n", "\n", - "So I called chase to report, they are saying because it is pending transaction I have to wait until pending is gone, 2 days later they accepted my claim. \n", + "I have since managed to open an account at another bank but this week I had reason to go to a branch of Chase regarding another issue and a manager using my That is a very serious unsubstantiated accusation and given this information I have no choice but to submit this complaint. \n", "\n", - "Today XX/XX/XXXX they refused to credit my money {$1700.00} because my pin number and debit card were used. So I told them I never received card. \n", - "Still their answer is same. \n", + "I have no interest in having an account again at a disreputable bank like Chase but I can not and will not accept or tolerate a derogatory accusation be associated with my name. \n", "\n", - "Then I asked what should I do? file small claim against chase? go to police office? \n", - "They told me I can go to police office to file a claim. \n", + "I hope that my complaint will hAve the desired effect of removing this derogatory unsubstantiated accusation be removed from my name. However. I will not let this unfair matter stand and Chase ought to know that I have already retained an attorney and will if necessary hold Chase responsible and liable all damage i have incurred now And in the future Enclosed, please find the letter from Chase stating that they were closing my mothers account and a similar letter was received by me too. \n", "\n", - "I will go to police office after my work. \n", + "Also please find a letter from her Doctor stating that she is XXXX XXXX. \n", "\n", - "Before I go to police office, I am asking your help about this situation. \n", + "Thank you. \n", "\n", - "Please help me.\n", + "XXXX XXXX\n", + "3. U.S. Bank sent two letters containing Visa Debit Cards to our address on XX/XX/2021. One Visa Debit Card is in the name of XXXX XXXX and one Visa Debit Card is in the name of XXXX XXXX. These cards supposedly link to existing checking accounts at U.S. Bank. However : ( 1 ) Neither of us have existing checking accounts at U.S. Bank, ( 2 ) Neither of us solicited a bank account at U.S. Bank, and ( 3 ) Neither of us solicited a Visa Debit Card. We have attempted to call U.S. Bank at the phone numbers provided in the letters but are only able to access an automated system which will not proceed without us establishing accounts and activating these cards. We are concerned here that one of two things has happened : either ( 1 ) we are victims of identity theft and some third party is trying to establish accounts in our name, or ( 2 ) U.S. Bank is engaged in bank fraud. In either case, we request the assistance of the Consumer Financial Protection Bureau. Thank you.\n", + "4. I contacted my bank over 3 times about this amount, the first two times I spoke to gentleman that agreed with me that I didnt get back a certain amount of dollars back, I did the math and they refuse to see that I do not owe this amount because I never had it in the first place. I wrote out all my charges and connected it to the charges made back from the consumer and I was missing XXXX, I called XXXX they said they gave it all back which is not their fault because they showed me proof. Along the lines Capital One does not want to take responsibility for the missing money. I have wrote everything out and then its not adding up, they keep saying that they did a charge back twice which is incorrect. My balance was at XXXX before I made this purchase and it shouldve been returned back to XXXX because I return all the items and nothing is in my possession. I have proof that I returned everything.\n", + "5. CB INDIGO ( Bank ) XX/XX/2022 I just recently got off the phone with the company and they wont put in a request of removal of a fraudulent hard inquiry from Insigo Mastercard to XXXX. They dont even have my information on file, I called 3 times most of them are lazy and was giving me a hard time.\n", "\n" ] } @@ -1107,12 +1600,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Get a response from PaLM 2 LLM by making a call to Vertex AI using our connection." + "Get a response from Gemini by making a call to Vertex AI using our connection." ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": { "id": "mL5P0_3X04dE" }, @@ -1120,7 +1613,7 @@ { "data": { "text/html": [ - "Query job d3965d90-8af9-46cb-9129-40e1d2866efe is DONE. 0 Bytes processed. Open Job" + "Query job a7ce86a7-3a18-47b9-a46f-98dbe6a5a339 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1138,19 +1631,19 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": { "id": "ICWHsqAW1FNk" }, "outputs": [], "source": [ - "# Make a DataFrame containing only a single row with our prompt for PaLM 2\n", + "# Make a DataFrame containing only a single row with our prompt for Gemini\n", "df = bf.DataFrame({\"prompt\": [prompt]})" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": { "id": "gB7e1LXU1pst" }, @@ -1158,7 +1651,7 @@ { "data": { "text/html": [ - "Query job 29a26018-027a-4c70-a795-841b5ace87d6 is DONE. 0 Bytes processed. Open Job" + "Query job d568c03d-6bbd-4c3e-b087-563b7f5135ed is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1170,7 +1663,7 @@ { "data": { "text/html": [ - "Query job 3abcc8cd-fa9f-4a93-b6be-6e22c8cdaceb is DONE. 8 Bytes processed. Open Job" + "Query job 17eaa806-51a4-4ee9-b219-75455d0095a7 is DONE. 8 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1182,7 +1675,7 @@ { "data": { "text/html": [ - "Query job ab5dfd41-98cb-4f24-a9c7-11399fcb2e47 is DONE. 2 Bytes processed. Open Job" + "Query job e6d40ded-691d-4523-94ea-dd8202bd0220 is DONE. 2 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1194,7 +1687,7 @@ { "data": { "text/html": [ - "Query job c6c7bce4-81a0-4c4d-a515-2d5dfffc08a2 is DONE. 299 Bytes processed. Open Job" + "Query job 200f0b88-7b6d-417b-a181-a98138e3bc95 is DONE. 193 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1206,16 +1699,16 @@ { "data": { "text/plain": [ - "'The most obvious difference between the two lists of comments is their subject matter. Comment list 1 primarily deals with issues related to credit reporting and identity theft, while comment list 2 focuses on issues related to bank accounts, fraudulent transactions, and customer service.'" + "'The most obvious difference between the two lists of comments is that list 1 is related to credit reporting disputes and list 2 is a collection of general consumer banking complaints.'" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Send the request for PaLM 2 to generate a response to our prompt\n", + "# Send the request for Gemini to generate a response to our prompt\n", "major_difference = q_a_model.predict(df)\n", "# PaLM 2's response is the only row in the dataframe result \n", "major_difference[\"ml_generate_text_llm_result\"].iloc[0]" diff --git a/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb b/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb index 87b8f9c0b6..40efe9d18c 100644 --- a/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb +++ b/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb @@ -325,7 +325,7 @@ "id": "8GvJAgnH5Nzi" }, "source": [ - "BigQuery DataFrames implements some of the interface required by matplotlib. This means we can pass our DataFrame right into `pyplot.plt` and using the default settings, matplotlib will draw a simple line graph for us." + "BigQuery DataFrames implements some plotting methods with the matplotlib backend. Use `DataFrame.plot.line()` to draw a simple line graph." ] }, { @@ -338,7 +338,7 @@ { "data": { "text/html": [ - "Query job 38e28079-9a84-4c28-a04c-cdc0afbb74b1 is DONE. 273.1 MB processed. Open Job" + "Query job 307ec006-490f-435d-b3e3-74eb1d73fe0f is DONE. 372.9 MB processed. Open Job" ], "text/plain": [ "" @@ -349,19 +349,17 @@ }, { "data": { - "text/html": [ - "Query job b1df794f-6d3f-4f05-8bcd-2da29f4eb402 is DONE. 372.9 MB processed. Open Job" - ], "text/plain": [ - "" + "" ] }, + "execution_count": 9, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -371,17 +369,10 @@ } ], "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "# matplotlin will draw a line graph by default\n", - "plt.plot(new_cases_usa)\n", - "# Rotate the labels on the x axis so that they don't overlap\n", - "plt.xticks(rotation=45)\n", - "# label the y axis for clarity\n", - "plt.ylabel(\"New Cases\")\n", - "\n", - "# Show the plot\n", - "plt.show()" + "new_cases_usa.plot.line(\n", + " rot=45,\n", + " ylabel=\"New Cases\",\n", + ")" ] }, { @@ -511,7 +502,7 @@ { "data": { "text/html": [ - "Query job 120a989f-4ce0-47e9-b051-a1a570ecd0e3 is DONE. 12.6 GB processed. Open Job" + "Query job 44159a16-cab9-4ffa-be68-2228387a48c2 is DONE. 12.6 GB processed. Open Job" ], "text/plain": [ "" @@ -565,7 +556,7 @@ }, { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAG1CAYAAAAMU3WaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAACFAElEQVR4nO3deXxU5dUH8N+9s08mM9lXEpawL4EEZFMWFQWkyubr2qp1t1SraK1Yl9LagtVW3EqtWrS+InUBwQ1QfEEFBCWEfUtAIPs++3rv8/5xM0OGmYRkMsnMJOf7+dCSuTczzwzj3DPPc55zOMYYAyGEEEJIjOIjPQBCCCGEkM6gYIYQQgghMY2CGUIIIYTENApmCCGEEBLTKJghhBBCSEyjYIYQQgghMY2CGUIIIYTENApmCCGEEBLTKJghhBBCSEyjYIYQQgghMS2qgpnly5eD4zg8+OCDvtscDgcWLVqE5ORk6HQ6LFy4ENXV1ZEbJCGEEEKiStQEMz/88ANee+015Ofn+93+0EMP4ZNPPsEHH3yAbdu2oaKiAgsWLIjQKAkhhBASbeSRHgAAWCwW3HzzzXj99dfxzDPP+G43Go148803sXr1alx22WUAgFWrVmHYsGH4/vvvMXHixAvetyiKqKioQHx8PDiO67LnQAghhJDwYYzBbDYjKysLPN/23EtUBDOLFi3CnDlzMGPGDL9gZs+ePXC73ZgxY4bvtqFDhyI3Nxc7d+4MGsw4nU44nU7fz+Xl5Rg+fHjXPgFCCCGEdImzZ8+iT58+bZ4T8WBmzZo1KCoqwg8//BBwrKqqCkqlEgkJCX63p6eno6qqKuj9LVu2DEuXLg24/ezZs9Dr9WEZMyGEEEK6lslkQk5ODuLj4y94bkSDmbNnz+I3v/kNvvzyS6jV6rDc55IlS7B48WLfz94XQ6/XUzBDCCGExJj2pIhENAF4z549qKmpQWFhIeRyOeRyObZt24aXXnoJcrkc6enpcLlcaGpq8vu96upqZGRkBL1PlUrlC1wogCGEEEJ6vojOzFx++eU4cOCA322//OUvMXToUPzud79DTk4OFAoFtmzZgoULFwIAjh07hjNnzmDSpEmRGDIhhBBCokxEg5n4+HiMHDnS77a4uDgkJyf7br/jjjuwePFiJCUlQa/X4/7778ekSZPatZOJEEIIIT1fxBOAL+SFF14Az/NYuHAhnE4nZs6ciX/84x+RHhYhhBBCogTHGGORHkRXMplMMBgMMBqNlD9DCCGExIiOXL+jpgIwIYQQQkgoKJghhBBCSEyjYIYQQgghMY2CGUIIIYTEtKjfzURIVxFFhkMVJjTYXEjSKjEiSw+ep2akhBASayiYIb3SjpI6rNxWitIaC9wCg0LGIS9Nh/um5WHywJRID48QQkgH0DIT6XV2lNTh8XUHcKTShDiVHGnxKsSp5DhSacbj6w5gR0ldpIdICCGkAyiYIb2KKDKs3FYKi9ODDL0aaoUMPM9BrZAhQ6+CxSlg5bZSiGKPLr9ECCE9CgUzpFc5VGFCaY0FiVplQCdWjuOQoFWgtMaCQxWmCI2QEEJIR1EwQ3qVBpsLboFBKQv+1lfJeLhFhgabq5tHRgghJFQUzJBeJUmrhELGwSWIQY87BREKnkOSVtnNIyOEEBIqCmZIrzIiS4+8NB0abW6c35aMMYYmmxt5aTqMyKI+XoQQEisomCG9Cs9zuG9aHnQqGapMTtjdAkSRwe4WUGVyQqeS4b5peVRvhhBCYggFM6TXmTwwBX+ZPwrDMuNhc3pQY3HC5vRgWGY8/jJ/FNWZIYSQGENF80ivNHlgCiYOSKYKwIQQ0gNQMEN6LZ7nMKqPIdLDIIQQ0km0zEQIIYSQmEbBDCGEEEJiGgUzhBBCCIlpFMwQQgghJKZRMEMIIYSQmEbBDCGEEEJiGgUzhBBCCIlpFMwQQgghJKZRMEMIIYSQmEbBDCGEEEJiGgUzhBBCCIlpFMwQQgghJKZRMEMIIYSQmEbBDCGEEEJiGgUzhBBCCIlpEQ1mVq5cifz8fOj1euj1ekyaNAlffPGF7/j06dPBcZzfn3vvvTeCIyaEEEJItJFH8sH79OmD5cuXY9CgQWCM4e2338bcuXOxd+9ejBgxAgBw11134Y9//KPvd7RabaSGSwghhJAoFNFg5uqrr/b7+c9//jNWrlyJ77//3hfMaLVaZGRktPs+nU4nnE6n72eTyRSewRJCCCEkKkVNzowgCFizZg2sVismTZrku/3dd99FSkoKRo4ciSVLlsBms7V5P8uWLYPBYPD9ycnJ6eqhE0IIISSCOMYYi+QADhw4gEmTJsHhcECn02H16tW46qqrAAD/+te/0LdvX2RlZWH//v343e9+h/Hjx2Pt2rWt3l+wmZmcnBwYjUbo9foufz6EEEII6TyTyQSDwdCu63fEgxmXy4UzZ87AaDTiww8/xBtvvIFt27Zh+PDhAed+/fXXuPzyy1FSUoK8vLx23X9HXgxCCCGERIeOXL8jvsykVCoxcOBAjB07FsuWLcPo0aPx4osvBj13woQJAICSkpLuHCIhhBBColjEg5nziaLot0zUUnFxMQAgMzOzG0dECCGEkGgW0d1MS5YswezZs5Gbmwuz2YzVq1dj69at2LRpE0pLS335M8nJydi/fz8eeughTJ06Ffn5+ZEcNiGEEEKiSESDmZqaGtxyyy2orKyEwWBAfn4+Nm3ahCuuuAJnz57FV199hRUrVsBqtSInJwcLFy7EE088EckhE0IIISTKRDwBuKtRAjAhhBASe2IqAZgQQgghpDMomCGEEEJITKNghhBCCCExjYIZQgghhMQ0CmYIIYQQEtMomCGEEEJITKNghhBCCCExjYIZQgghhMQ0CmYIIYQQEtMomCGEEEJITKNghhBCCCExjYIZQgghhMQ0CmYIIYQQEtMomCGEEEJITKNghhBCCCExjYIZQgghhMQ0CmYIIYQQEtMomCGEEEJITKNghhBCCCExjYIZQgghhMQ0CmYIIYQQEtMomCGEEEJITKNghhBCCCExjYIZQgghhMQ0CmYIIYQQEtMomCGEEEJITKNghhBCCCExjYIZQgghhMQ0CmYIIYQQEtMomCGEEEJITItoMLNy5Urk5+dDr9dDr9dj0qRJ+OKLL3zHHQ4HFi1ahOTkZOh0OixcuBDV1dURHDEhhBBCok1Eg5k+ffpg+fLl2LNnD3788UdcdtllmDt3Lg4dOgQAeOihh/DJJ5/ggw8+wLZt21BRUYEFCxZEcsiEEEIIiTIcY4xFehAtJSUl4bnnnsO1116L1NRUrF69Gtdeey0A4OjRoxg2bBh27tyJiRMntuv+TCYTDAYDjEYj9Hp9Vw6dEEIIIWHSket31OTMCIKANWvWwGq1YtKkSdizZw/cbjdmzJjhO2fo0KHIzc3Fzp07W70fp9MJk8nk94cQQgghPVfEg5kDBw5Ap9NBpVLh3nvvxbp16zB8+HBUVVVBqVQiISHB7/z09HRUVVW1en/Lli2DwWDw/cnJyeniZ0AIIYSQSIp4MDNkyBAUFxdj165duO+++3Drrbfi8OHDId/fkiVLYDQafX/Onj0bxtESQgghJNrIIz0ApVKJgQMHAgDGjh2LH374AS+++CKuv/56uFwuNDU1+c3OVFdXIyMjo9X7U6lUUKlUXT1sQgghhESJiM/MnE8URTidTowdOxYKhQJbtmzxHTt27BjOnDmDSZMmRXCEhBBCCIkmEZ2ZWbJkCWbPno3c3FyYzWasXr0aW7duxaZNm2AwGHDHHXdg8eLFSEpKgl6vx/33349Jkya1eycTIYQQQnq+iAYzNTU1uOWWW1BZWQmDwYD8/Hxs2rQJV1xxBQDghRdeAM/zWLhwIZxOJ2bOnIl//OMfkRwyIYQQQqJM1NWZCTeqM0MIIYTEnpisM0MIIYQQEgoKZgghhBAS0yiYIYQQQkhMo2CGEEIIITGNghlCCCGExDQKZgghhBAS0yiYIYQQQkhMo2CGEEIIITGNghlCCCGExDQKZgghhBAS0yiYIYQQQkhMo2CGEEIIITGNghlCCCGExDQKZgghhBAS0yiYIYQQQkhMo2CGEEIIITGNghlCCCGExDQKZgghhBAS0yiYIYQQQkhMo2CGEEIIITGNghlCCCGExDQKZgghhBAS0yiYIYQQQkhMo2CGEEIIITFNHsovCYKAt956C1u2bEFNTQ1EUfQ7/vXXX4dlcIQQQgghFxJSMPOb3/wGb731FubMmYORI0eC47hwj4sQQgghpF1CCmbWrFmD999/H1dddVW4x0MIIYQQ0iEh5cwolUoMHDgw3GMhhBBCCOmwkIKZhx9+GC+++CIYY+EeDyGEEEJigEcQUWdxosnmivRQ2r/MtGDBAr+fv/76a3zxxRcYMWIEFAqF37G1a9eGZ3SEEEIIiSqCyNBkc8Hk8IAxhkStMtJDan8wYzAY/H6eP39+2AdDSHcSRYZDFSY02FxI0ioxIksPnqdkdkIICUYUGYx2N4x2N8QoW5lpdzCzatWqsD/4smXLsHbtWhw9ehQajQaTJ0/Gs88+iyFDhvjOmT59OrZt2+b3e/fccw/++c9/hn08pPfYUVKHf2wtxbEqM1yCCKWMx5CMePxqeh4mD0yJ9PAIISRqMHYuiBHE6ApivELazRQu27Ztw6JFi3DRRRfB4/Hg8ccfx5VXXonDhw8jLi7Od95dd92FP/7xj76ftVptJIZLeogdJXV46P1iNFhdYIyBMYDjgF2nXDhRY8YL142hgIYQ0usxxmByeGC0ueE5r55ctAkpmCkoKAhaW4bjOKjVagwcOBC33XYbLr300jbvZ+PGjX4/v/XWW0hLS8OePXswdepU3+1arRYZGRmhDJUQP6LIsOyLI6g1OwEAMp4Dz3FgYBBEhlqzE8u+OIL1iy6hJSdCSK/EGIPZ6UGTNfqDGK+QdjPNmjULJ0+eRFxcHC699FJceuml0Ol0KC0txUUXXYTKykrMmDED69ev79D9Go1GAEBSUpLf7e+++y5SUlIwcuRILFmyBDabrdX7cDqdMJlMfn8I8TpQbsTxagu8y71ugcEliHAL0g2MAcerLThQbozgKAkhJDIsTg/KGu2oMztjJpABQpyZqaurw8MPP4wnn3zS7/ZnnnkGp0+fxubNm/H000/jT3/6E+bOnduu+xRFEQ8++CAuvvhijBw50nf7TTfdhL59+yIrKwv79+/H7373Oxw7dqzVHVPLli3D0qVLQ3lapBcoPtMElyCCATg/f827FOwSRBSfacLonITuHh4hhESE1elBo80Flyd2ApiWOBZCsRiDwYA9e/YEFM4rKSnB2LFjYTQacfToUVx00UUwm83tus/77rsPX3zxBb777jv06dOn1fO+/vprXH755SgpKUFeXl7AcafTCafT6fvZZDIhJycHRqMRer2+nc+Q9FSrtp/C0k8OX/C8p68ejl9e3L8bRkQIIZFjdwlosLngdAsh30eiVonEuPBvzzaZTDAYDO26foc0M6NWq7Fjx46AYGbHjh1Qq9UApJkW798v5Ne//jU+/fRTfPPNN20GMgAwYcIEAGg1mFGpVFCpVO16XNL76FTte8u39zxCCIlFDreABqsLjk4EMdEkpE/s+++/H/feey/27NmDiy66CADwww8/4I033sDjjz8OANi0aRPGjBnT5v0wxnD//fdj3bp12Lp1K/r3v/A34eLiYgBAZmZmKEMnvZzV4QnreYQQEkscbgFNNjdsrp71GRdSMPPEE0+gf//+eOWVV/DOO+8AAIYMGYLXX38dN910EwDg3nvvxX333dfm/SxatAirV6/G+vXrER8fj6qqKgDSMpZGo0FpaSlWr16Nq666CsnJydi/fz8eeughTJ06Ffn5+aEMnfRyHMeB4wLzZfzPAXWCJ4T0KE6PFMRYnT0riPEKKWcmbA/eygVj1apVuO2223D27Fn8/Oc/x8GDB2G1WpGTk4P58+fjiSeeaHf+S0fW3EjPt+9sE657bSfcnuYk4BbHuOY/CjmP9++ZRAnAhJCY5xZENFpdsHRhEBOzOTPhcqE4KicnJ6D6LyGdMSrbgMHpOhwsN+H8dx+DFMwMTtdhVLYhyG8TQkhs8AgiGm1uWJyeXtEUOqRghuf5NqfhBaFnJBSRnofnOVwzOguHK0wQgvz3zXHANaOzqGAeISQmCSJDo80Fs6N3BDFeIQUz69at8/vZ7XZj7969ePvtt6nGC4lqosjwzYk6qOQ8HB4RLduM8BygksvwzYk63HHJAApoCCExQ2jRBLI3BTFeIQUzwQrhXXvttRgxYgT++9//4o477uj0wAjpCocqTDhcYYS7OYpRyDjf+pIgMrhFEYcrjDhUYcKoPrTURAiJbtHcybo7hdTOoDUTJ07Eli1bwnmXhIRVndUJk8MDkTEoZTzkPA958/8rZTzE5sZqdVbnhe+MEEIihDEGo82Ns402NNpcvTqQAcKYAGy32/HSSy8hOzs7XHdJSNg1Wd0QRQaeP9dc0ts1m+cAnuMgigxNVnekh0oIIQFiqZN1dwopmElMTPRLAGaMwWw2Q6vV4n//93/DNjhCwi1RqwDPc77GksEoZBwStYpuHBUhhFyYyeGG0eaGW6Ag5nwhBTMrVqzw+5nneaSmpmLChAlITEwMx7gI6RLJOhUUFwpmeA7JOmqJQQiJDhanB41WFwUxbQgpmLn11lvDPQ5CusWQNB0cF+gK6/CIGJKm66YREUJIcLHeybo7hZwz09TUhDfffBNHjhwBAIwYMQK33347DAbaAUKi1ycHKv22YwcjMum8hWPbbnpKCCFdIRydrHubkHYz/fjjj8jLy8MLL7yAhoYGNDQ04O9//zvy8vJQVFQU7jESEjZ7zzaG9TxCCAkXh1tARZMdlUY7BTIdFNLMzEMPPYRrrrkGr7/+OuRy6S48Hg/uvPNOPPjgg/jmm2/COkhCwkWjkIX1PEII6aye2sm6O4UUzPz4449+gQwAyOVyPProoxg3blzYBkdIuA1Jjw/reYQQEiqXR0SjzdVjO1l3p5CWmfR6Pc6cORNw+9mzZxEfTxcBEr2S41W4UJcCnpPOI4SQruAWRNSYHShrtPWIQOZAuRF7Tkd2aT6kmZnrr78ed9xxB55//nlMnjwZALB9+3b89re/xY033hjWARISTilxKiRqFWiwugO6ZgNS1+xErQIpcRTMEELCqyd1svYIIrYdr8PavWU4UmnG2L6J+Oi+yREbT0jBzPPPPw+O43DLLbfA45GiSoVCgfvuuw/Lly8P6wAJCacRWXroNQrUt1LhlwHQaxQYkaXv3oERQnosQWRosrlg6gGdrJtsLny6vxLr91Wg3uLy3b7ndCP2nW3C6JyEiIwrpGBGqVTixRdfxLJly1BaWgoAyMvLg1arDevgCAk3UWSoNbfdd6nW7PS1PCCEkFB5O1mbekATyNJaC9YWleOrI9VBi45e1C8RQgSfY0jBjNFohCAISEpKwqhRo3y3NzQ0QC6XQ6+nb7UkOn2yvxJWZ9tbHq1OAZ/sr8T8QuozRgjpuJ7SyVoQGXaW1mPt3jIUnzUGHFfIOFw6JA23Te6HyQNTIjDCc0IKZm644QZcffXV+NWvfuV3+/vvv48NGzbg888/D8vgCAm3sw2WoLkyLbHm8wghpCMYYzDZPWiyuyBcqDpnFLM4PfjiYBU+3luOSqMj4HiiVoFrRmfh6tFZSIpTIlGrjMAo/YUUzOzatQt///vfA26fPn06fv/733d6UIR0lWqT68IndeA8QgjpKZ2szzbYsG5vOTYdqoY9SNG+wek6LCjsg+mDU6GUh7QZusuEFMw4nU5f4m9Lbrcbdru904MipKuk69u3S6m95xFCerdY72TNGMOPpxuxtqgcu041BBznOeCSQSm4trAPRmTpwXHRmUsYUjAzfvx4/Otf/8LLL7/sd/s///lPjB07NiwDI6Qr5CTFhfU8QkjvFOudrO1uAV8ersa6onKcbrAFHI9XyzFnVCbmjslCul4dgRF2TEjBzDPPPIMZM2Zg3759uPzyywEAW7ZswQ8//IDNmzeHdYCEhNPs4el4qJ3nEULI+WK9k3W1yYH1xRX47EAlzI7AFZa+SVosKMzGjOHpMdXWJaRg5uKLL8bOnTvx3HPP4f3334dGo0F+fj7efPNNDBo0KNxjJCRs1u8vb/d514/v28WjIYTEiljuZM0Yw8FyEz7aW4bvTtQhWG7yxAFJWFCQjbF9E6N2KaktIQUzADBmzBi8++67bZ6zfPly3HvvvUhISAj1YQgJq3d3BbbhCOa1b05SMEMIgcMtoNHmgt0Ve0GMyyNi67EafFRUjhM1gTs0NQoZZo3MwPyCLPRJjO06cSEHM+3xl7/8Bddddx0FMyRq1FvaLpjndarOhh0ldRGvnUAIiQynR0CjNTY7WTdYXdiwrwKf7KtAoy2w2nmmQY35BdmYNTIDOlWXhgHdpkufRayXbSY9D8e17z3JACz74gjWL7qEKgET0ovEcifr49VmfFRUjv87WgNPkLWkMTkJWFiYjYkDkiHrYZ9rPSMkI6SdZHz7ayMcr7bgQLkxYr1GCCHdxy1IQYwlSFJsNBNEhu9K6vDRnjIcrDAFHFfIOFwxLB3zC7ORl6qLwAi7BwUzpFfhOvBtxCWIKD4TucZphJCuF6udrE12Nz47UIn1xRWoCdJvLlmnxLwxWfjZqCwYtIoIjLB7UTBDepXRWQb8VNe+wo6M0VIpIT1VrHay/qneinVF5dh8uBrOINvDh2XGY2FhH0wdlAK5LLqq9HYlCmZIrzJxYArW769q9/nxmp7/jYaQ3iQWO1mLjGH3qQZ8VFSOPacbA47LeA7TBqdiYWE2hmX2zkbPXRrMTJkyBRqNpisfgpAOsdoDM/tbwwFI0kW+gRohpPNisZO1zeXBxoPV+Li4HGWNgTPKBo0CP8vPxDWjs5Aa37tbsIQ0B1VUVIQDBw74fl6/fj3mzZuHxx9/HC7XuQZ9n3/+OTIzM1u9n2XLluGiiy5CfHw80tLSMG/ePBw7dszvHIfDgUWLFiE5ORk6nQ4LFy5EdXV1KMMmBN+V1rf7XAagLEiZb0JI7GCMwWhz42yjDY02V0wEMhVNdvxjawmuf+17vPJ/JQGBzIDUOPz2ysFYc9cE3HFJ/14fyAAhBjP33HMPjh8/DgA4efIkbrjhBmi1WnzwwQd49NFH230/27Ztw6JFi/D999/jyy+/hNvtxpVXXgmr1eo756GHHsInn3yCDz74ANu2bUNFRQUWLFgQyrAJgdnR/pkZAHjl6xPYUVLXRaMhhHQVxqSZmLMNdtRbnRCClb2NIowx7D3TiCc/PohfvLkbH+4ph7VFoT4OwMUDk/H360bj9V+MxexRmVDFULuBrsaxEDKfDAYDioqKkJeXh2effRZff/01Nm3ahO3bt+OGG27A2bNnQxpMbW0t0tLSsG3bNkydOhVGoxGpqalYvXo1rr32WgDA0aNHMWzYMOzcuRMTJ0684H2aTCYYDAYYjUbo9b1zLZGcM/+Vb7G3LHD7YmvUCh4X9UvC278cT/VmCIkRZocbTTHSydrpFrDlaA3W7i3HyVprwPE4lQxXjczEvIIsZBqiM20jUatEYlz4l+Q7cv0OKWeGMQZRlN4kX331FX72s58BAHJyclBXF/q3WKPRCABISkoCAOzZswdutxszZszwnTN06FDk5ua2Gsw4nU44nee2qZlM7b9wkZ6vqQM5MwCglPEorbHgUIUJo/oYumhUhJBwiKVO1rVmp69KrylIbZs+iRosKMjGzBEZ0ChpBuZCQgpmxo0b5+ucvW3bNqxcuRIAcOrUKaSnh9ZtWBRFPPjgg7j44osxcuRIAEBVVRWUSmVAO4T09HRUVQXfkbJs2TIsXbo0pDGQnq/R6rrwSS3EqWRwiwwNto79HiGk+9hcHjRYY6OT9eEKEz4qKsM3J+qCLn1d1C8RCwqzcVG/JPAx2PAxUkIKZlasWIGbb74ZH3/8MX7/+99j4MCBAIAPP/wQkydPDmkgixYtwsGDB/Hdd9+F9PteS5YsweLFi30/m0wm5OTkdOo+Sc/R0aUiHoCC55CkpV1NhESbWOlk7RZEfHO8Fh8VleNolTnguFrO48oRUsPHvslxERhhaDiOQ5xSBq0q8jNHIQUz+fn5fruZvJ577jnIZB1/Ur/+9a/x6aef4ptvvkGfPn18t2dkZMDlcqGpqclvdqa6uhoZGRlB70ulUkGlosxuEpxa3rGc9yaHB4W5iRiRRflWhESLWOlk3WRz4ZP9ldhQXIH6ILPCafEqzCvIxpxRGYhXx05NK7VCBp1aDp1SHjW5hGGtM6NWqzt0PmMM999/P9atW4etW7eif//+fsfHjh0LhUKBLVu2YOHChQCAY8eO4cyZM5g0aVLYxk16j45uaFDKONw3LS9q/oMlpDeLlU7WpTUWfFRUji1Hq+EWAj908vsYsKAwGxfnpcRMw0eFjIdOJYdOLYciCisLtzuYSUxMBNfO9buGhoZ2nbdo0SKsXr0a69evR3x8vC8PxmAwQKPRwGAw4I477sDixYuRlJQEvV6P+++/H5MmTWrXTiZCzuf0dOyb3KVD0jB5YEoXjYYQ0h6x0MlaEBl2lNZjbVEZ9pUZA44rZBwuG5qGBQXZGJQeH4ERdhzPcYhTyRGvlkMd5dvA2x3MrFixwvf3+vp6PPPMM5g5c6ZvhmTnzp3YtGkTnnzyyXY/uDdxePr06X63r1q1CrfddhsA4IUXXgDP81i4cCGcTidmzpyJf/zjH+1+DEJaCtbLpC0//tSAHSV1FNAQEgGx0Mna4vDgi4OVWLe3AlUmR8DxpDglrhmdiZ/lZyGpC7YvdwWtUpqBiVPK2j2JEWkh1ZlZuHAhLr30Uvz617/2u/2VV17BV199hY8//jhc4+s0qjNDWhrw2GfoSDij4IDsJC3+Mn8UBTSEdJNY6GR9psGGdXvLselQFRzuwE+Vwek6LCzsg+lDUqNyWeZ8SjmPeJUCcSpZ1DSo7PI6M5s2bcKzzz4bcPusWbPw2GOPhXKXhHSLjm7cFBhgdniwclspJg5IptwZQrpQtHeyZozhx9ON+KioHLtPBaZT8BwwZZDU8HFElj7qZzVkPOfLg1HJo3sZ6UJCCmaSk5Oxfv16PPzww363r1+/HsnJyWEZGCHRQASgUcqocB4hXUgUGZqiuJO13S1g86FqrNtbjjNB+rXFq+WYMyoT88ZkIU3fsY0w3c27nVqnlkOjiJ1lpAsJKZhZunQp7rzzTmzduhUTJkwAAOzatQsbN27E66+/HtYBEhJpHGNUOI+QLiCKDKbm1gPRGMRUmRxYv7ccnx2ogiVI8nHfZC0WFmZjxrD0qE+QVSlkiI+y7dThFFIwc9ttt2HYsGF46aWXsHbtWgDAsGHD8N133/mCG0J6CrPTA61SToXzCAkTxhhMdg+a7K6oawDJGMOBciPWFpXju5K6gHIOHIAJA5KwsLAPCnMTonpmQyHjEaeSQ6eSQ9nBGluxJuQ6MxMmTMC7774bzrEQEpXsLhGj+uiocB4hncQYg8nhgdHmhkeMrtYDLo+I/ztWg4+KylFSYwk4rlHIMHtkBuYXZCM7MTobPgLSdmqtSoZ4laJX9XQKOZgRRRElJSWoqanxNZ30mjp1aqcHRki0UCl4KpxHSCdFayfrBqsLG4or8Mn+CjTaAhvRZhrUmF+QjVkjM6BThbXObFhplDIpmVclj+rZoq4S0r/M999/j5tuugmnT58OyDjnOA6CEN0lpgnpiIevHEzbsgkJUbR2sj5ebcaHe8qw9VgtPEGWugpyE7CgIBsTByRHbZVehYyX8mBU8qjZTh0pIQUz9957L8aNG4fPPvsMmZmZvTIKJL0DB+Dm8X0jPQxCYk40drIWRIZvT9RhbVEZDlaYAo4r5TxmDJOq9A5I1UVghBcm4zlfHky0Jx13p5CCmRMnTuDDDz/0dcsmpKdiAA5VmjA6JyHSQyEkJkRjJ2uT3Y3PDlRifXEFaszOgOMpOiXmjcnGnFGZMGijr+Ejx3HQNi8jaWOoKm93CimYmTBhAkpKSiiYIb3Cjz/VUzBDyAVEYyfrU3VWrNtbji8PVwdtZTI8Mx4LCvtg6qCUqFymUSnO5cFE61JXtAgpmLn//vvx8MMPo6qqCqNGjYJC4R/J5ufnh2VwhESDb0vqcceUvEgPg5CoFG2drEXGsPtUAz4qKsee040Bx2U8h+mDU7GgMBvDMqNvh6Kc56FT947t1OEUUjCzcOFCAMDtt9/uu43jODDGKAGY9Dh8dJXBICQquDwimmyuoMXkIsHm8mDjQalKb3mTPeC4QaPA1aMzcc3oLKToVBEYYet663bqcAopmDl16lS4x0FI1MrPoRYGhHhFWyfriiY71u0tx8aDVbAGWeLKS43DgsI+uHxoWtTNdHi3U8f10Kq83SmkYKZvX9rdQXqPKYNTIz0EQiIumjpZM8aw92wT1haVY2dpPc4fDc8Bk/NSsKAwG6P7GKIqYZa2U3eNkCsAvfPOO/jnP/+JU6dOYefOnejbty9WrFiB/v37Y+7cueEcIyERZYqSb6CEREI0dbJ2ugVsOVqDtUXlOFlnDTgep5LhqpGZmFeQhUxD9FTplfEctEo54tW0nbqrhBTMrFy5Ek899RQefPBB/PnPf/blyCQkJGDFihUUzJAepckaWBWUkJ4umjpZ15qdWF9cjk/3Vwb9ctEnUYMFBdmYOSIjanJOOI6DRiF1p46j7dRdLqRg5uWXX8brr7+OefPmYfny5b7bx40bh0ceeSRsgyMkGlQabZEeAiHdJlo6WTPGcLjShLVF5dh2vDag4SMAjO+XiAWFfTCuXyL4KAkWlHIe8SoFdGraTt2dQk4ALigoCLhdpVLBag2c+iMklm0+XIN7pw2kBD3So0VLJ2u3IGLb8Vp8VFSOY1XmgONqOY+ZI6SGj7nJ2giMMJCc5xGnkmZhVPLomBnqbUIKZvr374/i4uKAROCNGzdi2LBhYRkYIdGiqsmGQxUmjOpDu5pIz8MYg9npQZM1sp2sG20ufLqvEhv2VaDe6go4nq5XYX5BNq4amQmdOvINHzmOQ5xSCmC0ysiPp7cL6V9g8eLFWLRoERwOBxhj2L17N9577z0sW7YMb7zxRrjHSEhEOQSGBlvghyshsS4aOlmX1ljwUVE5thythlsInBHK72PAgsJsXJyXEhXLNurmPBgdbaeOKiEFM3feeSc0Gg2eeOIJ2Gw23HTTTcjKysKLL76IG264IdxjJCSiBIEhSauM9DAICZtId7IWRIYdpfVYW1SGfWXGgOMKGYfLhqZhYWEfDEyLfMNHhYyX2gqo5VDQduqo1OFgxuPxYPXq1Zg5cyZuvvlm2Gw2WCwWpKWldcX4CIk4tyBiWEZ8pIdBSKdFupO1xeHB5wcr8fHeClSZHAHHk+KUmDs6Cz8bnYnECH+B4DmpOzVtp44NHQ5m5HI57r33Xhw5cgQAoNVqodVGRxIWIV3B4RGx5sez+PlEKhZJYpPdJTWBdESok/WZBhvWFZVj0+EqONyBgdSQjHgsLMzGtMGpEZ/50CrltJ06BoW0zDR+/Hjs3buXKgGTXmP1rtO4aXwueJ6DKDIcqjChweZCklaJEVl6WjsnUSmSnaxFxvDjT41YW1SG3T8FNnzkOWDa4FTML8jGiCx9RAMH73bqOJWMqvLGqJCCmV/96ld4+OGHUVZWhrFjxyIuLs7vOHXNJj1NRZMDhypMMDvc+MfWEhytMsPtYVDIOQzNiMevpg/E5IEpkR4mIQAi28na7haw+ZDU8PFMQ2CNJr1ajjn5mZg7OgtpenW3j89LxnO+PBjaTh37OBZCfWqeD4xco7VrtslkgsFggNFohF4ffe3eSffq99hnIf2eVinD/ZcNwls7TqHB6kLL/2o4Tlrrf+G6MRTQkIiKZCfrKpMDH+8tx+cHqoI+fr9kLRYU9sGMYWkRy0FpuZ1ao6BlpGjXkes3dc0mpB1cHgEf7jmLWrMTHCcVyeIAMAAeUUSt2YllXxzB+kWX0JIT6XaR6mTNGMP+ciPWFpVje0ldQJVeDsDEAclYWJiNgtyEiAUPKoXUnVqnoqq8PVVIwczp06cxefJkyOX+v+7xeLBjxw7KpSE9jihKSYwcAAXP+z6UvT+7BRHHqiw4UG7E6JyESA6V9CKR6mTt8oj4urnhY0mtJeC4VinDrOYqvdmJkWn4KOd5qR6MSg6lnPJgerqQgplLL70UlZWVAduxjUYjLr300qhaZiIkHBgAjyDlyJz/7ZLjOMhkHDyCiL1nmyiYIV0uUp2sG6wubCiuwIZ9FWiyBzZgzUpQY35BNmaNyECcqvur4vIcB61KhniVImoaTpLuEdK7zZsbc776+vqAZGBCegLv5UIUGWTBPiObT+Ai21yY9HCR6mR9rMqMj4rKsPVYLTxB+jYV5iZgQWE2JvRPjsgyjkYpLSPFUVXeXqtDwcyCBQsASN9Eb7vtNqhUKt8xQRCwf/9+TJ48ObwjJCQKKHjALQIeEeB5EWAcOE5aZgInfVNWyHiMyU2I8EhJT+TtZG20u7utCaQgMnx7Qmr4eKjCFHBcKedxxbB0LCjMRv+U7v8Sq5DxiG9eRqLt1KRDwYzBIDXaY4whPj4eGs25tVClUomJEyfirrvuavf9ffPNN3juueewZ88eVFZWYt26dZg3b57v+G233Ya3337b73dmzpyJjRs3dmTYhHSajOcggkEQAZeHwTsVw+FcQDM4XYdR2dSMkoRPJDpZG+1ufLa/EuuLK1BrcQYcT9WpMHdMFubkZ8KgUXTLmLxkvFSVV6eiqrzEX4eCmVWrVgEA+vXrh0ceeeSCS0rbt2/HuHHj/GZwWrJarRg9ejRuv/1236zP+WbNmuV7XACt3hchXcnhCX4h8YY1iRoFlsweRlPcJCwi0cn6VJ0Va4vK8dWRajiDtDsYkaXHwsJsXDIwpVtnQjiOg7Z5GUlLVXlJK0LKmXn66afbdd7s2bNRXFyMAQMGtHp89uzZbd6HSqVCRkZGu8fmdDrhdJ77NmEyBU6PEhIO3rhFxnPok6jBxAHJkR0Q6RG6s5O1yBh2nWzAR0VlKDrTFHBcznOYPiQVCwqzMTSje+t00XZq0hFdmm4ejiz7rVu3Ii0tDYmJibjsssvwzDPPIDm59YvGsmXLsHTp0k4/LiEXIjLpw17Oc74KwaP60DITCU13drK2Oj3YdKgK6/ZWoLzJHnA8QaPA1aMzcc3oLCTrum82nLZTk1B1/965Dpg1axYWLFiA/v37o7S0FI8//jhmz56NnTt3QhZ0SwmwZMkSLF682PezyWRCTk5Odw2Z9DoMLg+Dy+PCdyW1FMyQDuvOTtblTXas21uOjQerYAvSr2lgqg4LCrNx2dC0bgsmOI5DHG2nJp0U1cHMDTfc4Pv7qFGjkJ+fj7y8PGzduhWXX3550N9RqVSUV0O6jUcElHIOHoFh06Fq3DM1j/JmSLt0Vydrxhj2nmnCR0Xl+P5kPc6fL+c5YHJeChaOzUZ+tqHbclJoOzUJp6gOZs43YMAApKSkoKSkpNVghpDu5vEwqBQyVBvttNRELqi7Olk73QK+PFKDdXvLcarOGnA8TiXDVSMzMb8gGxmG7mn46N1OHaeSQ0HbqUkYdWkwE+4Iv6ysDPX19cjMzAzr/RLSGSKANJ0SNo+IBpsr0sMhUaq7OlnXmp1YX1yOT/dXwhSkV1NOogYLCrNx5fCMblnW4TlpO3W8mrZTk64T0QRgi8WCkpIS38+nTp1CcXExkpKSkJSUhKVLl2LhwoXIyMhAaWkpHn30UQwcOBAzZ87symET0mECAAXPIUmrjPRQSJTpjk7WjDEcqjBhbVE5vjlRG9DwEQDG90vEgsI+GNcvEXwXLyVxHAeNQupOHUfbqUk36NJgxmw2t3n8xx9/xKWXXur72Zu4e+utt2LlypXYv38/3n77bTQ1NSErKwtXXnkl/vSnP1FODAmJ2IVFx4w2N8bkJmBEVvduXyXRqzs6WbsFEVuP1WJtUTmOVQd+3qoVPGYOlxo+5iZru2wcXko5j3iVAjo1bacm3SukYKa6uhqPPPIItmzZgpqamoAZmPY2mpw+fXqbszebNm0KZXiEBBWsJHvYcAz3TaPkXyJ1sm6yu2HuwiaQjTYXPtlXgQ37KtFgDVzazNCrMa8gC1eNzIRO3bWpkXKeR5xKmoVRyWkZiURGSO/y2267DWfOnMGTTz6JzMxMmkIkMaEr81kcXZzMSaJfd3SyPlFtxtq95fj6aA3cQuBjjO5jwILCPpic17UNHzmOQ5xSCmC0ypjaR0J6qJDehd999x2+/fZbjBkzJszDIaTrdGU+i1sE/vL5EWz49SU0O9PLiCKD0S41geyKTtaCyLC9tA5ri8qxv8wYcFwh43D5UKnh48A0XdgfvyV1cx6MjrZTkygTUjCTk5PTZd88COkKosi65ELT0qEKE1bvPoOfT+zbpY9DokNXd7I2O9z4/EAVPi4uR7UpsOFjcpwS14zJwtX5mUjowkBdIeOltgJq2k5NoldIwcyKFSvw2GOP4bXXXkO/fv3CPCRCwmtHSR1WbitFSZAEyXBiAF75+gRuGp9L31p7sK7uZH2m3oa1e8ux+VAVHEGqAg/NiMfCwmxMHZzaZcEFbacmsabdwUxiYqJfbozVakVeXh60Wi0UCv828A0NDeEbISGdsKOkDo+vOwCL0wNNN3woV5uc2H+2CWP6Jnb5Y5Hu1ZWdrEXG8MNPDVhbVI4ffmoMOM5zwLTBqVhY2AfDu3DHnFYpp+3UJCa1O5hZsWJFFw6DkPATRYaV20phcXqQoVd3aZ0PLwbgi0NVXRbMiKJUT6TB5kKSVokRWXqaBeoGXdXJ2u4SsPlwFdYWleNsY2DDR71ajp/lZ2LumGykxndNSQrvduo4lQxyWkYiMardwcytt97aleMgJOwOVZhQWmNBolYJjuMg57vng9oahgqvwYKW70/WY+W2UpTWWOAWGBQyDnlpOtw3LQ+TB6aEYeTkfFan1AQy3EFMldGBdXvL8fnBSlidgTvh+qfEYUFBNmYMS4OqC2YUZTzny4Oh7dSkJwgpZ+bzzz+HTCYLqMS7efNmCIKA2bNnh2VwhHRGg80Ft8CgbP62qVZ0TzBTkNO5WRlvjk/LoCVZp0SN2QlBZEjUKqGU8XAJIo5UmvH4ugP4y/xRFNCEUVd0smaMYX+ZER8VlWNHaV1AlV4OwKS8ZCwozEZBTkLYl3k4joO2ubmjlpaRSA8TUjDz2GOPYfny5QG3i6KIxx57jIIZEhWStEooZBxcggg1330f3tXGwOWC9mqZ4+MNWpweAUerzBBEhtwkrS8hU83LkKHnUWVyYuW2UkwckExLTp3kcAtosIa3k7XLI2LL0RqsKypHSa0l4LhWKcPskRmYV5CN7ARN2B7XS6WQAhidiqrykp4rpGDmxIkTGD58eMDtQ4cO9eu1REgkjcjSIy9NhyOVZmTo+W4LZl76ugRDM/VIjVd3KKfl/Byfc+PlfP9bZ3FBp5aD897GcUjQKlBaY6GO3Z3QFZ2s6yxObNhXgU/3VaLJ7g44np2gwfyCLMwckYE4VXgLz8l5XqoHo5JDKac8GNLzhfRfkMFgwMmTJwO2ZZeUlCAuLi4c4yKk03iew33T8vD4ugOoMjmRoFVc+JfCwOER8Zv/FkMtlyFNr8KN43Nxw7gcHKkyt5m4e36Oj5dHFMGYlOfg9AhwuES/bscqGQ+jyKhjdwicHgFNNjesYUwOP1olNXzceqwWniBbt8fmJmBBYR9MGJAU1oaPPMdBq5IhXqXolm7YhESTkIKZuXPn4sEHH8S6deuQl5cHQApkHn74YVxzzTVhHSAhnTF5YAr+Mn+ULwelu9icHtidHtRZnHhq/UEs++II1HIePMe3mrh7fo6Pl5znwXEAOICJaN4WfO5i5RRE6tjdQeHuZO0RRHx7og4fFZXjcGVgDzCVnMcVw9MxvyAb/VPC+4VP05wHE0dVeUkvFlIw89e//hWzZs3C0KFD0adPHwBAWVkZpkyZgueffz6sAySksyYPTMHEAck4VGHC1a981y2PKTCpNgjPSX+3OgU43QKyE7RQyvmgibvn5/h4qRU8VHIedpcAjoPfrizGGJpsbgzLjMeILD1t3b4AtyCiyeaG2RG47BMKo92Nz/ZXYn1xBWotgVV6U3UqqeHjqEwYNOGbGVTIeMQ3LyPRdmpCOrHMtGPHDnz55ZfYt28fNBoN8vPzMXXq1HCPj5Cw4HmuW/NJOCBgt4rIgHqrE32TtTCo5aizOPHXTcfwYb8kyOV8qzk+HMchRafCmQabtCzBMYgig7P5wqxTyXDftDzaut0Gbydro82NE9UWGB0uGNRKDEyPC2mp51SdFR8VleGrIzVBdzyNyNJjYWEfTBmUErakWxkvVeXVqagqLyHn41gHmyy53W5oNBoUFxdj5MiRXTWusDGZTDAYDDAajdDru65yJokN/R77rFseh+MAMKmInpeC5yACUMp4eEQRYnO0M6pPAh6dOQSTB6a02M0kIEGrgErG+4IWGQ+kxatQb3HBLTIo+HPBCoCAXVAuQURjc7ATzVu3u3I2qWUn66LTDVi9+yzO1lt9r19OchxuGp+DgtwLb6cXGcP3J+uxtqgcRWeaAo7LeQ7Th0hVeodkxIdl/LSdmvRmHbl+d3hmRqFQIDc3F4IQvqx/QnqcIF8RGBgEEXAyAQoZD14GeETgZK3Vb8mpZY6PsfmiOywzHvdNy/Mtl7W88APArat2B+yCioWt28Fq6oRjNun8TtZ7zzTi718eh80lQK9WQC/j4BYYTtZa8Pcvj2PxFYNbDWisTg82HqrCur3lqGhyBBxP1CpwdX4Wrh6diWRdeKr00nZqQjompGWm3//+93j88cfxzjvvICkpKdxjIiTmMXg3VEs4SIELAMhlHHiOg8gAnmNI1SlhdHh8AUfLHJ9gsxXnL5cdKDMG3QUFRPfW7WA1dTpbCJCxc0GMtwmkyBhW7z4Lm0tAik7p29auknNI0SlRZ3Fh9e6zGJ2T4LfkVN5ox7q95dh4qAq2IFu2B6bpsLAwG5cOSQu6/VlkDCXV1nYvadF2akJCF1Iw88orr6CkpARZWVno27dvwHbsoqKisAyOkFjWcnKG4wDGpKCGBwfGGDwig0bBQ6OUgeM5v4CjIzk+re2C8orGrdut1dQJdTaJMQaTw4MmW2An65JqK87WW6FXK3yBjBcHDvFqBc7WW1FSbcWg9DgUnWnCR0Vl2HWyIWCCjeeASwamYEFhNkZlG1pd9tl7prFdS1ocxyFOKYNOLYdWGd5aM4T0JiH91zNv3rwwD4OQns17fZXzUpDjERlkHIfUeOlC3pmAo7VdUF7RuHW7tZo6QMdmk9rTydrokHKM9LLggYdSxsEkith8uArLNzbip3pbwDk6lRxXjZKq9Gbo1W0+t/YsaU3KS5FmYWg7NSFhEVIw8/TTT4d7HIT0WFoFDxnPw+r0QIQ0RaNR8EiNV0PXXPm1MwFHW5WOz9+6HS3CMZvU3k7WBrUSCl4KKFRy/8DBLYiot7pgdniwdm95wO/mJmkxvyAbV45Ih6YdO4jaWtJK1alQZ3Vh7d5yzC/oQ0EMIWFE85qEdBEOgErGYXCGHmvumIAb3tyFk7VWpOqU0tJSc9DR2YAjWKXjlrugvFu3o+ni2ZnZJKvTg0Zb+5tADkyPQ05yHE7WWpCiUwJMqtLcaHO3WjRvfP8kLCzMxti+iR3auh2wpMVJlXllvJQnlRSnxKlaa9TlLxES60IKZgRBwAsvvID3338fZ86cgcvl/+2poaEhLIMjJJYxAG4GnG2w4kSdFY/OHILH1x2A0eEBx3NhDTgutAsq2rZlhzKbZHN50GhzwxmkCWRbybY8x+Gm8Tn42+ZjKG9ywCMwuILM5qgVPGaOyMD8gmzkJmlDel7eJa0EOQ+5jAfPwe+5RTp/iYoqkp4qpGBm6dKleOONN/Dwww/jiSeewO9//3v89NNP+Pjjj/HUU0+Fe4yExCxBZGiye1BndeLSIWldGnBcaBeUVzRc0Doym3ShTtYXSrZtsLpwoNwIs1MIuispKU6J68f1weyRmdCpQ5+sVsp55CbGQS3nwYCgW6ojmb/UVdvgCYkGHS6aBwB5eXl46aWXMGfOHMTHx6O4uNh32/fff4/Vq1d3xVhDQkXzSEvdVTTvfH/7n9FYOFZq/eFyCfjnNydxusGKvklxuHfqACibGwN2daARbRc0v/GcVwiwsG/iBTtZn59sq2hOtjU53JDzHPomx2FfWRPcQuDH3OB0HW6ekIvJeaFX6ZXxnFQPRi2HSi6DKDLcump384yTKmDGqcrkxLDMeLz9y/HdGkC2tg0+Fooqkt6rS4vmAUBVVRVGjRoFANDpdDAajQCAn/3sZ3jyySdDuUtCerTisiYsHNsHr39Tile3lsJsd0MEwAP4945TWDQ9DyOyDF0aaHRFXZfOCjabNDAtDiaHBxVN9jZ/N1iyLWMMLo8Iu1uAwy2i1uK/nKOQcZgxLB0LCrKRl6YLacwtt1NrFP5VeaMxfynYNnjGGBgD4pQyNNnc+MfWkqgsqkhIe4UUzPTp0weVlZXIzc1FXl4eNm/ejMLCQvzwww9QqcJTAZOQnmT7iVq8tq0Uz206BkFkkMs4yDlpy7bR5sbyL45Cr1FAxnNdEmiEu65LOHlr6ng7WVcaA6vsBtMy2VYUAaPdhSa7G57zm2IBSNYpMXd0Fn6Wn4mEEJd41ApZu7ZTR1v+0vnb4C1OD2rNDjg9Irzz8rtPNWL17jP4+cS+3Tq2UEXDUimJLiEFM/Pnz8eWLVswYcIE3H///fj5z3+ON998E2fOnMFDDz0U7jESEhZikItcd6kzOfDK1yUQRAalnAPPSVuSOU4ak1tgaLS5MSw9DnK5tOQUzkAjXHVd2hLqBSbUTtZGhwsOjwiHR9paHexfV8ZzuG5cH9w2uR9kPIeSaitO1Fja3WRSIeN9y0iKDnSnbm/+UndouQ3e4vSgvNEOgTHIeQ4cB4hgcHtEvPz1CQxIiYv65aZoWyol0SGkYGb58uW+v19//fXIzc3Fzp07MWjQIFx99dVhGxwh4XSowhSxx7Z7RAhMbG5lIF0UBcbgEUS/7ton6+3IStD46s+0DDQOlBvBc1xIF8eurhIcygXG28na7PCgI6l7ImP44acGvLPzDEyO4Fur41VyaFUyMJFh2qA0HCw3BiYJJ2kxZXAqMg1qv+CG5zhoVTLo1YpOdafu7k7trfFug3d6BNSaHRCY9Px9XdkZIOMZnB4xant4eUXjUimJDmGpMzNp0iRMmjQpHHdFSJepNbdv+aIreERpq7ai+RohMOnb8PmXcJdHRHmjHdmJ5wIalYxHrUvA4+sOoNHq6vC3UVFkaLC4IIgiTA43DBpFwOxMZ3bZdPQC07KTdUeCGLtLwKZDVVi7txxljYH5NDwHJGgUMGgUkMs41FlcGJCqg9npwoqvTvhV5DU53NhX1oS9Z5ugVcqhVfDolxKHe6bm4bJhaT2qO7V3G/yBMiOcHrF5Rqa5xhEYBJFBrZAjRaeMyh5eXtG8VEoiL+RuZu+88w4uvvhiZGVl4fTp0wCAFStWYP369WEbXDQTRYYDZUZsO16LA2XGiC5hkAvbUVKH5RuPRezxve8Ot8DAIM3IeJtRtvzYlfFSoFNrdvgu9I12FywON840l9mPV0uzDt5gYUdJXauPu6OkDreu2o3nNh2F2elBeZMdp+qsfsXivHVd8tJ0HS7a573AmB1uGNQKuAURTo8IlZxHhl4Fi1PAym2lEEUGUWRotLpwtsEGo93d7kCm0mjHyq2luO5fO/HS1yUBgYyc56BVypCVoEFSnBICA+osLmiVMtxwUR+s+aHMlySskvNSwTyrW5oRY4AoiojXyHGyzoY/fXYYO0vrO/QaRDtvUrJKzsMjMin5FwwiY/AIDDzHITVeBZVMBncHZ+e683OwI0ulpPcJaWZm5cqVeOqpp/Dggw/iz3/+MwRB2jqZkJCAFStWYO7cue26n2+++QbPPfcc9uzZg8rKSqxbt86v7xNjDE8//TRef/11NDU14eKLL8bKlSsxaNCgUIYdNrRmG1u8MwcN1sg3WhSZNPvibTrZ3N3AR9oizMHpEeFwi1DJOVSbnBAZYHd7YHcL4DhAJZchRaf0BQvBvo2eP2OikPMob7TD5hJQ1mhDlkEDhZzv1C6bQxUmHK4wwu4SYXLYpOfFASq51K4hQatASbUZ35+sR3aiJqAJZGsYY9hXZsRHRWXYWVqP83+NAzA5LxkLCrPBGMN7P5ThbL0VVpcHCo7DgFQdbhqfgzil1EQyXiWH083gET2ot7p9Sy3gOLhFBg48MvTyHvvtfvLAFNx/+SD86dPDEEQRoiD9O6kVMqTGq6BTyWF3Cx2anevuz8FYbKhKuk9IwczLL7+M119/HfPmzfPLnxk3bhweeeSRdt+P1WrF6NGjcfvtt2PBggUBx//617/ipZdewttvv43+/fvjySefxMyZM3H48GGo1W03e+sqtGYbW1pOTafqVK3mWHQHDtIMjd+FucXfeQ4QRIDnpG2zVpcHlUYPBJGBByDjpWJsjDHYXQIqmuxIjVcHXRoINiWvVsjAJ3GoMTlgdwuoMNqREqds1y6b1pJ7vyupQ6PNDY4D5DwvdQcHYHeLKGuwIUOvhlNgONtoQ4bhwv/Nujwithypxtq95SittQYcj1PKMHtUBuaNyUZWgsZ3+5jcxKAVgH/4qQE2lwiz0wO3IEIUAdH7b8Fx4DmAiYBHFMFxsrAkQkerm8bnYuPBKhysMMKglkMhk0Gt5H1b2jvSUiMSn4Ox2FCVdJ+QgplTp06hoKAg4HaVSgWrNfADqDWzZ8/G7Nmzgx5jjGHFihV44oknfDM9//nPf5Ceno6PP/4YN9xwQyhD7xRas409LaemVYqQV1XDxhvQtCTjgNR4NTRKGWrNTthdHogA7E4PAAYO0oyNW5BmdLzLU6IA1FsciFcrfd9GvUHHnjONOFppRoLWPz9Gp5IjLjUORpsbNpeAR2cNw9wxWW2+X1v7Bn7P1AHYdKiq+Tlw53YGMQY5B7hFhhqLE3qVDAZ12xeYOosT64sr8On+ShjtgbuashM0mF+QjVkj06FVBn5s8RyHwRmBdWNqzU7Y3B6AAQo5Dw7SchcD4PZICdneQAzo2d/ueZ7Dr6bnNQchAhK0MjARcAhCh2bnIvU5OCJLjwGpcThYboJBo4BCxkOt4H11c6KxoSrpPiEFM/3790dxcTH69vWvSbBx40YMGzYsLAM7deoUqqqqMGPGDN9tBoMBEyZMwM6dO1sNZpxOJ5xOp+9nkyl866fdsb2VhJff1HSE05oYAIUMcAtAsk4Bu1NAnEoOrVIGhUwGgYlgTNrdxPMcGBicggg0Bwbn3xcAODwMKkFEklaJHSV1+MfWUhyrMsPqkpakbC4P0vTnunMDAAcOerUCDo+IRK2ize3DbX0D/+2H++HyiFDJZXALInhIkRYDAI6DjJcK2CWnxGFgelzQ1+RIpQlri8qx9Xht0CWosX0TsbAwG+P7J7W74aOc56FTy6FVyPD9yQbIeA6iyMBx0uvKNQczDIBHYIhTSTMUQM//dh+OGjiR+hz8/mQ9jHY3zE4PjA43ZBwHlZyHQauAy8OisqEq6T4hBTOLFy/GokWL4HBISYq7d+/Ge++9h2XLluGNN94Iy8CqqqRvfOnp6X63p6en+44Fs2zZMixdujQsYzgfrdnGHt+2VEGAw9W+LstdSRClmZiLB6Rg85Fq1FtdaLD6Lz/JeQ7ZBjUsLgEeR+ul/L08gogmqwuPfLQPDVYXGGMQm3dPWV0CzjbYkJOk9QtonIIIUWRYseUEakyOoDkPF/oGfqbBDrvbg6wENSoaHXB7RClYaM4DEgRpBmnKoBS/QMQjiPjmRB3WFpXhcKU54PkoZBwKcxMxc0QGpg5OaVcQ491OHa9SQNPcGuJAmREnay1Ii1ej1uxsTnaFb3yA9BrpNYqQllpiVWdr4ETic7BlUJ2hV6PJ5oLTI/XacnhEDM+Mx5LZw2iJvxcLKZi58847odFo8MQTT8Bms+Gmm25CdnY2XnzxxYgs/7S0ZMkSLF682PezyWRCTk5OWO6b1mxjz4gsPZJ1ShytMrc7+bQriQyIU8mwr8wIpYwHD84XWJw7h6Ha4oTb077gizGGP3x6CLVmJzgAchnfPCsizUB4RIYqox15aTrfRdtbAfZsgxVJcSq/GZcl6w7grikD4BFZ0KUqQPoGrtfIYXG44fKISDeo0WBxwSUIEEUpYFDIeWgUMozNTQIgVTr+9EAF1hdXoM4SeKFL0CgQp5LD6fbgeLUZp2ot+OxApa9hZDAapUxaOgtSldd70U2LV0Ip51FrdsLp8Q8OeUjBo93dsaWWWNeZGjjd/TkYLKhOjFPA4RLhFgQYHR4YNEpMHJAclscjsSmkYMZut2P+/Pm4+eabYbPZcPDgQWzfvh19+vQJ28AyMjIAANXV1cjMzPTdXl1djTFjxrT6eyqVqstaKnjrNUhN5PiAJnK94VtdrPn+ZD1qzE4pkIl8LAMASItXwezwIDdJC4vTg7JGm99xxgCHu/2zSC6B4XS9FRy8eSHSjiiFjEl5NpAScq1OD2Q8jyabC3a3CDkHJGikXCIOHNS8DDqViPImO/74yWGo5TwsLk/QpSqRMWjkPHgeMNs9yExQIztRDaebQWAi+Oay+QNSdeB54PlNx/DV0Rq4ggRoI7P0KMxNwFdHqmFzefwaRp6steDvXx7H4isG+wIahYxHvFoOnUoOeRtVeVtedHUqOeJUMjhcIjyiCJdHRKPNBZdHhNnhgUbBItZuINZ09+dgsGUtDhw0Shk0kEEhl+FkLS3v93YhZUTOnTsX//nPfwAALpcL11xzDf7+979j3rx5WLlyZVgG1r9/f2RkZGDLli2+20wmE3bt2hWxAn3eeg06lQxVJifsbgGiyGB3C6gyOXvNt7pY4f1GJ4gMuUlaaJSRTwBO0Slhskv5JwB8264BaTYD6FjMJeelInQeUUoS5lpUrZHxHBQy3ndLrdmFRqsLNpcAd/PW79MNVvxUZ4PF6YHF6UFFk0OqCcMY4jVy8BwHh1tAeaMdFqcHIpMCJLdHhFNg0Knk0ChlqLO44PRIrRpkPA+zQ9rhZLS7cfc7e/D5wSq/QEbOc7hieDr++fNCrLhhDA5VmmF3i75aMHxzPkSKTgmbS8B7u88iTilHVoIGOUlaJGiVbQYywLmLbqNNqmnjvQDGqxVIilNCq5RjVJ8EvHDdGLz2i3F4+5fjKZBph+7+HGzPslZH6+OQniekT/eioiJMmTIFAPDhhx8iPT0dp0+fxn/+8x+89NJL7b4fi8WC4uJiFBcXA5CSfouLi3HmzBlwHIcHH3wQzzzzDDZs2IADBw7glltuQVZWll8tmu7mTaAblhkPm9ODGosTNqcHwzLjaVt2lGn5jS5erUD/lNC6JIeLUsbD6ZaaKZrsbpTWWqULgTd/o4MzR3KeA2Pw5ZSITApCWItwSMZzkMuk3JXB6TqYndIuJgZpi7IgAjaXB+WNNlQZ7RCZ1ATTe/9qBQ9wUp5LtckBl0do3g3EYHa4kZcWj0dnDcGAVB0cLg/qrE40WJywuUU0WN0oqbH4jTlRq8Atk/pizd0TsWT2UAxOj/drGNkyGAMnbUdP1CpR3mhDjdnZofYCF7roxqvleHTmEEwfmoZRfQz0JaQD2vM5GK6Cei1n2IKh5f3IiabisSEtM9lsNsTHxwMANm/ejAULFoDneUycONFXDbg9fvzxR1x66aW+n725LrfeeiveeustPProo7Barbj77rvR1NSESy65BBs3boxYjRmvaGoiR1p3/je6SJeodwkilHIeIgOqzU543y7Btmu3h+93mu/A46uXIi3FyJq3rIrNszYHyo3wNCf6ugXmuw/GpMrEruZj3sJ3CpkMKTqV1JgQDE63ALtbWkIyO9zQKmW+fJbUeDXe3vETtpfWB11KGpimw7WF2Zg+JA1Kuf93KKPDBbfIoG8OojiOg4znmpN1Ocg4DmanJ6Rv3tHWwbonaetzsKMF9dpqUkrL+9Ep2orHcqwjzVGa5efn484778T8+fMxcuRIbNy4EZMmTcKePXswZ86cNncbdTeTyQSDwQCj0Qi9nt7svcmBMiPueedHxKnkvm/0B8qNER7VOW0FMecf41v87K0zo5RzcHmkHTpCkDuSNfdKYEyqyOvty8PzHFweMaCqbksqOYf+yXEQAdhcAurMDjg8DGqFlNQ7ICUON47PgciAtXvLsetkQ8Bz4TngkkEpWFjQByOz9a0Gk8erLHhqwwHEqRTQKmUBu5fsbgE2pwev/WJct3f0Jh3X2nb+xuYE6/NnsNtzUTx3nwIStAqoZDyczd3Wg90n6Vod/TcOVUeu3yHNzDz11FO46aab8NBDD+Hyyy/35bBs3rw5aDE9QiKhrW900aA93yI4AIlxClgcUiVgaVkJSIpTosnuBpq3GvNAQHAiMCmgSdAqpJmX5q3T7cmF9ggMFpcAbXMQ6A2geO9uKIsLz20+jmqTM+B349VyzBmVibljspCub30WleM4xClluGRQMoZk6HGk0ow4pf8yUri+eUdLB+uerqMF9dpbSbg9M2wUsHaPaC0eG1Iwc+211+KSSy5BZWUlRo8e7bv98ssvx/z588M2OEI6w5sz8fi6A6gyOZGgVUR6SO2mlPPQqeTINKjRYHVBUEpdo739dNCcXaLkeXgYg1ImBSne3UteGrkMd17SH69/ewoc15wsLLA2gxk5J+XTNFicYHFKqQ6NCChlHJRyHiaHG032wLYQfZO0mF+YjSuGp0PTRm6LWiGDTi2HrsV26vP/nc7/5k2J9bGhIwX1RmTpO3RRDOeyFgldtBaPDSmYAaSt097t017jx4/v9IAICafzv9HFCgXP4cUbxmByXorvwztBIwVjTXY3TtVZ8cLmY7C5BMh5rjm3BOA5vrkAH4MgMCjlPDINWmgUMjjcIhxu4YKzMgzS7I/TLSX9eqQixHAJDK4grQbG90/CwsJsjOub2Orsl0ImBWc6tRyKILtSKLelZ+hIQb1QLorBZtioX173itbisSEHM4TEipbf6K5+5btID6dd5DIO8WpFq8sjSVqlVKYfQMs5EI47tx+I8dIMTEKcAnlpOvzwU2BeS0syDr5WCr7Zm+Zc3vN/j+OAOKUcShmH2yf3D9oXiec4xKnkiFfL27ULKVKJ9bQ8ET4dKagXjotitC559GTRWjyWghnSK8RazoTR7sED7+3FXVMH4KbxuQEfxCOy9MhNjsP+sqbmgEbKZfHOyogig0ImJeumxKkwdVAKdpTUtfp4HODLpXEHyyZupuClb8x6tQIcB9TbXDA6/C82WqU0AxOnlHU4T6m7/51oeSK8OrLz6FCFqV0XxQaLC9uO1wYNNKN1yaMni9bdZZGvIkYICcAAnG6w4ekNhzD31e98gYi3rsO3JXVYWJgtbbP2MLgFAS5BhNMjwi0wCAxwiyKSdUoMy4jHNyfqoJK3/p+7t/FiazQKGbIMavRL1iKxeVbIJTAoOA4GtdQuIDlOhdwkLTIMUrXgaEu4Pp93eeJIpQlxKjnS4lWIU8l9yxNtBX8kuI4U1Du/qGFL3pYbNreAv248gkfe34d73vkRt67a7ffvQgX1ul+0Fo+lmRlCopRSJiXsHq0yY8na/fj5xL745kSd3yxCVoIGZxpsOL+0Cw9pS3aN2Ym3d/6EY1UmpMSrpP5EbhE8DzARuFAbSw5S/kx2ohp8i4J2UtE8Dwan6zBtSAo0ytj6KKHlia7T3vynYAn63sTvWrMDVqcArVIGnVrRah5MtC559HTRmOMWW59AhPQScl5qCwCIEBlDlcmJ5zYfQ7xKjqQ4FRS8VEiuzuwCBw4yTlpi4jip2JxKLkNSnAJmh4D3fjgDt4dBr+aRrFOhqskOQWy7xk28Wo5UnQpmpxt1FheqTc7m5EoOHlEKZPRqGX5z+aBOBzKRyFmh5Ymu1d78p9YuihzHQauUITdJ22agGe4lD8qfar9oKx5LwQwhUYhrbiEgiAwipMq+AGDjBChkHlicbjg9UrdtgUkzMRkGNZTNfY3kMk6KVtQc6i0ugAOsLqn4nNBKnRmpJo0SiVo5eE6atterFbC5RGTo1TA73LC5pCn94Vn6sHwDi1TOSrTuyOhJ2pv/dP5FscHiwl83HoFOLe3es7sEeEQRcp6HWsEHBJrh2tZP+VMdF025iBTMEBKFWkvCdbhF2N0O8M0tC8ABApMCnhqzA5kGDZQK3hetKHip9owoMjTaArdVeylkQL/kOL/eSBzHQRAZ4lUyPP8/o8FzXFi/gUVySy0tT0SXlhfFbcdr4REBl0dEpdEOp0f0tdhQNedmtcyDCceSB23vjn0UzBASQ1iLv3CQLgLe0r+iCNRbXMhOVIOJgNHhRqPNDU+QvgU8BylJF4DdI0DhvaBz0pZqGcdJu5WsUvPAUdnhbcQY6ZyVaN2REcvCtUSTpFVCZCIqjC6IDM11lKT3vt0tosJoh14t9ws0O7PkEen3IgkPCmYIiRG++jHNf0TGWvaYBscBTo+AKqMTVpcnaO8ljUIGOQ/IZByUPI+c5Dhc1DcRn+6vQL3VjUStAgp511ffjXTOSlvJp1R1uOPCuUQzLCMeApNaaijlnK9XFwdAzjO4PNLS6rCMeL/fC3XJI9LvRRIeFMwQEiO4c5Mwvm7XLYsreFemzE7/VgMcgMkDk7GwsA9GZetRWmOD0eFCcpwKhbkJ0GsUmDo41XcxMjk8Xb4zIRpyVqJxR0YsCvcSzZEqM2TNndMFEQDPpJkZBgjNHeBlHIcjVeawBBfR8F4knUfBDCEx4vz+9iJjEANbJPnwHHDJwBTcPXUAshI0AKQLwbj+idCp/KvydvfOhGjJWYm2HRmxRhQZ/rG1BE02FwwapS+3Ra0IfYmmweYCz3HITtSg3uKC0yOAiefuN1mnhM0lhC24iJb3IukcCmYIiVGtFepNiVPi8mHpuHliDnQqhW+bq04lh7aNqrzduTMhmnJWomlHRqxZvfsMdp9qhMgYLE67L0k3NV4qnBjKEo03uFDKePRL0cLhEs/tZlLycLhFKHgxbMFFNL0XSeioAjAhMeJCDSJlHIdpg1Pw7p0TcM+0AUjWqZGsk6rypupUOFlrxTcn6nCgzAgxWEJNN4rWKqKk/XaU1OHlr0/ALUhFGOW8lN9id4sob7TD4vSEVIG3ZWVgMECjlCFerYBGKQMY0GRzIy9NF7bggt6LPQPNzBDSQwiM4dvjdfifszvxq2l5uHf6QADRUz/j/N0uEwckU85KjPLuAHK6RUgVArjmGQ0GGSfVRaoyOpCVoOrwEk0kkrMpfyr2cez8phg9jMlkgsFggNFohF5P04S9Xb/HPov0ELqEnJdyZBikjtcynsPvZg3BiCxD0OTMRpsLChmPWyb1wyUDU7o8T6StgIpyVmLPgTIj7nnnR2hVMlQZnXC4BfAcB48o1YTxXlTkPDAsU4/1iy7p8L+p33umObjo6iCcKgBHl45cv2lmhpAYJIN/XyUZz/u2sPKcCJeH4dX/K8WIrPiA+hkeN4PdJaDB7cLfvzyG/+w4hYHpHf8G2t4PfipI1vN4dwCpZDKkxqtwtsEGlyBVqW75DhBEqT/Y9yfrO/xvHInkbMqfil0UzBASg85vEMkYfFcRnuMhl4kwOdw4VGFCavy5QMbi9KC80Q6RSbM3YIBcxnc4sGjv0hUVJOuZWu4AilPJoJBJ1aKBc7MyHKQWG26BhfxvHAvBBc3mRAcKZgiJcRykbast8RzgYYDLc65+BgNDrdkJkTFf7yZPc1CToVe1O7DoyExLrBYkowtU21ruADKo5fCIDEq59D4TRQaRMWkbdZwSDo8Ylf/G4RAt+WiEghlCYh7HBQYzYnO9D6X8XP0Mh0uE0yNAxnPgwEGEVIxMzvPtDiw6OtMSiwXJ6AJ1YS2TdOssTogig1SihYMIadkzrfn9EY3/xuFAy6fRhbZmExJjOEgzL14yjvNrECkyER6BQa9WYESWHo02NxhjvuRMqZoqg0dkUMmlTsQA2rWNtiMzLYD/ckQw0VaQzHuBOlJpQpxKjrR4FeJUct8FakdJXau/K4oMB8qM2Ha8Niq2v3c17w6g/qk6AIBHlAo5ahQ8shM10Kmk78rR9m8cDucH9WqFDDzPNRcLVMHiFLByW2mPfw9EE5qZISTGeLsYeAMaj8gAToS356R3N9OiS/N8u5mqTE5oFDwABkGUzpNxnF8+TXsuOh2daYmlgmSdye/prbM5kwem4MN+Sbj2tZ04WWtFqk4JTYvCjNH2bxwusbp82pPRzAwhMUhgQLZBhVsn9YVeo4AgNjfgExkMGgV+N2sI7pqa5/v2PCwzHoLIwHFSoqZa7v/t2XvRuVAxso7OtMRSQbKOzjp5tWc2pyfP2sjlPB6dOQRJcQoYHR44PGLU/huHS3uC+o4WCySdQzMzhMSos01OvLPzNHgOUMh48ABUCh4jsuIxIuvct8GWW1y/K6nFf3aehssjQsZzEEXWoWJkocy0xEpBslDye9ozm7PsiyMwaBQ4WWvtsbM2sfJvHC7Uzyn6UDBDSAzzMEjrTqIIDoBCzuFghQmLP9iHWyf1xSUDU307cUb1MWBElh7xagXe230GNSYnAEAha/9FJ9TqrLHQ0LGjFyhRZFhfXIHDFSZolTL/AiuQZnNUch6HK82IV8mQGq/u0UmisfBvHC6xtHzaW1AwQ3qNnjS1HwwDYHJIFWiMdg/+tvk43tl52jcLAMD3zdnlEQEOyNCrccP4XNw0PrfdF51Qv4VHe82QjlygvDkyh8qNaLS7YbQDjTY3UuNV55buwNBkc0FkDAaN0telvCfX2In2f+NwiUTLBdI2amdAeo0DZUZc/cp3kR5Gt0qNV4IxDt6VE0Fk57U1kD54Q5kh6Im1WM5ttxWCXqD+Mn8UAPi25GoUMlQZHQAnFS7kOc6Xi2R3Cfip3gKAQ7/kOKgVUsdnbwdoBqkS82u/GNcrAoCeKBItF3oTamdASBC9MRnP7PAgLyUOx2ssAIDBaTrwvBTZdHaGoCd+C7/QrNPEAcm4ddVuX44MADTaXLC7Rch5KVisNTsRp5LBLQgQRECr5OERRfxUb4fTc257vFLGQy7je+X7sqfoTUtr0Y6CGRLTOjI70BuT8TwCg9HhAWvud+D0MGhavAyd3Ubqff3rrE40Wd1I1CqQrFPF9Ad6WxeoA2XGgB1PqfFqlDfa4REZeA5wuD0w2twwOz3geQ4aBY+KJgcExiDnOanODwCHWwA8Is422CL7hEmn9MSgPhZFfTDzhz/8AUuXLvW7bciQITh69GiERkSiRbDaHgNS4zBrZCZykrQBwc2wjPgIj7j7CYzB4vRAFAGeBzyiCKlN5TmhVmj1vv6HK4wwOTxSFVieg16twPAsfUxPtbd2gQq240mnkiM7UYNaswNOtwiRATaXgJFZBjTZXDhWbYbIGBR8yzwcaXWf54CNB6s6lLNECAkU9cEMAIwYMQJfffWV72e5PCaGTbpQsFLiTXYXdp1qwM6TDdCp5IhTyvzWrw9UGCM97G7HGGCyuyECYCLz1YdhjPnyNzwig5yD3y6dC8127Sipw5K1+1FnccLhYQCTAhlRZDDa3dhf1nTB3TqxmHPT2o4n6f0Whya7GzanB0/MGY65Y7KwevcZPL3hEMC84QsDY9JylIznkRqvwslaKq5GSGfFRFQgl8uRkZER6WGQKBGstofF6UGtWdo5AgZ4BBFaldJvC+y7u85EeugR4d3ExQDUmhwQRcDidPvyN0TGoNcoYLS7gs52penVmDkiA5cMTPFtNV32xRGUN9khiOe6JDORQc7zEBmDR5BmhHpaxdy2djwBgMMtYkS2AXPHZIHnOeQkaaFTyeERRLgEEUyU8mXUChlS41XQKmSosTgpb4aQToqJYObEiRPIysqCWq3GpEmTsGzZMuTm5gY91+l0wul0+n42mUxBzyOx6/xKrYwx1JqlnAQFz4MB0gwEO9cN+h9bS1FSbY700CPOLQJVJoevv5M3GdXtEfHAmmLIeSn4SdQq4RJE1JicqDI5sL+sCW98q8DwLAMGpulwuNIM1iKQ4SD9nlsQIW+euUhSKIPm4sRyg76ObslN0ioRp5RBq1ICjPPtZFIreXDgYHcLVFyNkDCI+nYGEyZMwFtvvYWNGzdi5cqVOHXqFKZMmQKzOfiFadmyZTAYDL4/OTk53Txi0tXOz1twuEU4PWJzcmVzgiWT8kO8Ca7HqsywuIQIjzx6MEgtEcTm/7e7BDRYXag1uxCnlMEjMlQ2OaTgpDlp1e4ScbjCiHe/P928TNLiDjkpoGGQllB8QdIFKubGYoO+li0ibE4PaixO2JweDMuMDwjEvDM5TTYP1Aoe8WqF1LsIXLtbSBBCLizqZ2Zmz57t+3t+fj4mTJiAvn374v3338cdd9wRcP6SJUuwePFi388mk4kCmh7m/LyFlt2ggXOzDXL+XDdop0eAw+2J4Kijm7fTEgNQ1miHQi4tF8llUkdujjG4RRFJCgUarG7fud4ABpB+4Jg0Q+Od9Tl/1qGnNOhr75ZcKq5GSPeI+mDmfAkJCRg8eDBKSkqCHlepVFCpVN08KtKdzs9bkPO8b7ur1BWaQa2QQa3k4RZEVJscMDs8iN7v+tFFYIDgFsEDEKVG2wAYRJHBLbAWMzDnfqdlMAkAMh6wuwUMy9T7zTqE0v8oWrV3S25v61tESCTEXDBjsVhQWlqKX/ziF5EeComQ87/tGjRyKGW8VLcDgIznEa+W42yDHUa7O8KjjV0iAJev+ZM0C2N3C36zOC21rCWukPHQqeQBsw69tUEfFVcjpGtFfc7MI488gm3btuGnn37Cjh07MH/+fMhkMtx4442RHhqJoJZ5C3aXALmMP7erBgyVRkdAIEOXjc6Rej+dW6qT88FfU4WMw5icxKCJvN5ZtUabG+d3UunpOSTemZxpg1Mxqo+BAhlCwijqZ2bKyspw4403or6+Hqmpqbjkkkvw/fffIzU1NdJDIxHm/ba7o7Qea/eW4cvD1TA7PBAE/4tkik6Jq/Oz8NmBCtSYo3/5IlYwcFDKOd9WbJ4DkuOUeGDG4FaLwFEOCSGkK1CjSRKzDleYsGr7KawvrvAVg2tpeKYeiy7Nw8wRGdheWo8H1+xFo42WncIhOU4Jp0eE0yM0Ly9JReCe/Nlw/Hxi3wv+PjXoI4RcCDWaJD2WIDJ8daQab357Crt/agg4rpBxuGZ0Fn55cX+MzD6XnJmkVcJOW7M7TapPwyFeLUemWg6HS6oizHMczA4PcpK07bofyiEhhIQTBTMkJhjtbqzZfQb/2Xka5U32gOMpOiV+PqEvbp7YF6nxgbvZhqTpgs7ekPbhAXA8B2+sIeelom8apQyATCr+JutY4i416COEhAsFMySqldSY8ca30lKS3R04szI8U487L+mPn43OglLeej77Zwer0LMXVLuWRskD4GBzCdAqZVArzr3W3sTdYZnxPTJxlxAS/SiYIVGHMYYvD1fjrR0/YUdpfcBxngMuG5qGu6YOwPh+SQHF14Ipb7KdV+GNtJeMAxK0Klicbsh4DjKeh8MjUuIuISRqUDBDokajzYn//lCG93afwel6W8DxeLUcCwqzccfF/ZGbHNeh+85O0IIHQFkz7ccBkPMctCo5GGPI75OAqYNS8M2JOir+RgiJKhTMkIgSRIbjVWb85/uf8Mm+SlicgS0H+iZrcdP4XNw4Phd6jSKkx7k6PxNLPzmEpl5WRM87T9LahJRBI4dBo4DIALPdDZmMw+QBKRjXNxGjcxPAcxya7G6/BN07LhlAibuEkKhCwQzpdowxWJwebC+pw+pdZ/BdSR2C9RWcOCAJN0/IxZXDM6BSBFaL7Qi5nMf8wmys2v5Tp+4nlvAckKCRIzdZhyuHpyEtXo3jVWbYPCJ0KhkOlBlxqs4Ku1uqupufk9CuGRZK3CWERBsKZki3cbgF1Ftc+GRfBT4sKkNJjSXgHI1ChlkjM3Dj+FyMzjFAJe9cENPS9CFpPSqYSdQqcN+0AfjvD2X4qcHq65WklPPISdTg2rE5uGRgSqszJ6LIaIaFENIjUDBDupRbEGFxeHC63oq1ReX4ZH9F0MJ1mQY15hdkY35BNnKStFB3ciYmmGjs98OjucO3jG/OT5FhVLYBo7INMKgV+KnBhq+PVqOyyQGBNeexyDgMSdfh8auGY/LAFNw5JQ8Hyo3Ye7YJHAPG5CZgVPaFy+XTDAshpKegYIaEnSgyWFweWBwe7C9rwod7yrD1WC08QdaSCnITsKAgG9OGpCFVp2quW9I1omXbsErBI14lx+0X98fFzUs65+eltPSHq0e0GazwPIfROQkYnZPQ3U+FEEKiAgUzJGzsLgFmhxsmhxvfHK/D2qIyHKwwBZynlPOYMSwNCwqyMTRTj6Q4JbTKrn8rfn8ycJt3d+E5QMZz0KsVGJ6l79DuHwpWCCGkbRTMkE5xeURYnNIsTIPVic8OVGJ9cQVqzM6Ac1N0Sswbk405ozKREq9CYpwSOlX3vAVFkWHlttJueSwvngN+NioT8wqyYbR7kKhVIFmnotwUQggJMwpmSIcJorQbyeL0wOkWcKrOinV7y/Hl4Wo4PcEbPi4szMaUQSnQKOVI0CoQrw5ti3WoDlWYUBok4bgrqOQchmbo8duZQ3DJIOruTgghXY2CGdIujDHYXAIsTg9sLgGCKGLXyQasLSrDnjNNAefLeQ7Th6RiQWE2hmboIed5JMQpEK+St6tib7g12FxwBQm0wknBAw/OGIypg9No9oUQQroRBTOkTQ63FMBYnR4IIoPV6cGmQ1VYt7ciaMPHBI0CV4/OxDWjs5CsU0HGc0jQKKHXRCaI8UrSKs9VkOsCGXoV/n7dGKqCSwghEUDBDAngEaQ8GLPDA3dz8ZLyJjvW7S3HxoNVsLkCmwIMTNVhQWE2LhuaBqWcB89xSNAqoFcromKGYkSWHhl6NeosrrDdp5wHUuNVuHdaHn4xsV9UPE9CCOmNKJghAKQEWatLyoOxNwcrjDHsPdOEj4rK8f3J+oCS+DwHTM5LwcKx2cjPNoDjOPAcB71GgQRNdAQxXjzP4YbxuXji44Oduh+tgsO7d02CyeGhQnOEEBIlKJjp5ewuAWanGzanAJFJ4YrTLeDLIzVYt7ccp+qsAb8Tp5LhqpGZmF+QjQyDGgDAcRz0ajkStErIovTiflMngxkOwENXDEFBbmL4BkUIIaTTKJjphVpup/aI55Jia81OfFxcjs/2V8LkCGz4mJOowYLCbFw5PMNX3I7jOOhUciRqFZDL+G57DqHozAyKWsHj4SsG466peWEcESGEkHCgYKaXOH87tRdjDIcrTVhbVI5tx2uDNnwc3y8RCwr7YFy/RPAtknh1ajkStUooojyICdWVw1Ihk8lw2dA0zB+TDbm8Zz5PQgiJdRTM9GCMMdjdAswOaTs1Y+ciFbcgYtvxWnxUVI5jVeaA31XLecwckYH5BdnITdb6HdOppOUkZQ+/uP/r1vGRHgIhhJB2oGCmB3J6pADGu526pUabC5/uq8SGfRWotwbu7EnXqzBvTDauGpURUNhOq5QjMU4R1k7WhBBCSGdRMNNDeAQRVqeUzBusOFxJjQUfFZXh66M1cAuBa0n5fQxYUJiNi/NSAhJ4NUoZErXKLulkTQghhHQWBTMxjDEGq0uAxeGBzRWYsCuIDDtK6/FRURn2lxkDjitkHC4bKjV8HJQeH3BcpZAhSavs0k7WhBBCSGdRMBODHO5zy0giC5xlsTg8+PxgJT7eW4EqkyPgeFKcEnNHZ+FnozORqFUGHFfK+W7rZE0IIYR0Fl2tYoRbEGFxSLuRvFV5z3emwYZ1ReXYdLgKDnfgOUPS47GgMBvTh6QG3YGkkPHd2smaEEIICQe6akUxUWSwuKR6MA53YAsBABAZw48/NWJtURl2/9QYcJzngKmDpIaPI7L0QfsjKWR8RDpZE0IIIeFAwUwUsjUHMNbztlO3ZHcJ2HxYavh4psEWcFyvlmNOfibmjs5Cml4d9D4i3cmaEEIICQcKZqKE0yMl8lqdgl9V3vNVmRz4eG85Pj9QBYszMOm3X7IWCwr7YMawtFZ3H0VLJ2tCCCEkHCiYiSBBZLA4PK1up/ZijGF/uRFri8qxvaQuoEovB2DigGQsLMxGQW5CqwFKtHWyJoQQQsKBgplu1nI7td3d+jISIPVQ+vpoDdYWlaOk1hJwXKuUYdbIDMwfk43sRE2r9+PtZG3QKKK2CWR36RPPo8zceuDY8jxCCCGxISaCmVdffRXPPfccqqqqMHr0aLz88ssYPz62Ss1faDt1S/UWJzbsq8An+yrRZHcHHM9KUGNBQTZmjshAXBs7j2Khk3V3+9t1F+H6N3e16zxCCCGxIeqDmf/+979YvHgx/vnPf2LChAlYsWIFZs6ciWPHjiEtLS3Sw2uTWxBhdXpgdrS+nbqlo1VSw8etx2rhCdLxsTA3AQsL+2DCgCS/ho/ni6VO1t3torxkaBQ87EG2rntpFDwuykvuxlERQgjpDI61tc4RBSZMmICLLroIr7zyCgBAFEXk5OTg/vvvx2OPPXbB3zeZTDAYDDAajdDr9V09XIgig9UlBTCtbaduySOI+K6kDh8VleNQhSnguFLO44ph6VhQmI3+KXEXvL+e3sk6HHaU1OHWf++GO0jAqOA5vH37eEwemBKBkRFCCPHqyPU7qmdmXC4X9uzZgyVLlvhu43keM2bMwM6dO4P+jtPphNPp9P1sMgUGCF3B7pL6IlmdbefBeBntbny2vxLriytQa3EGHE/RKTFvTDbmjMqEQXvh+i9xKimI6emdrMNh8sAUvH37eLy85Tj2nG2CR2CQyziMzUnA/ZcPpkCGEEJiTFQHM3V1dRAEAenp6X63p6en4+jRo0F/Z9myZVi6dGl3DA8eQYTJIdWEaWs7dUun6qxYW1SOr45UwxlkB9PwTD0WFmZjyqCUdi0RUSfr0EwemIKJA5JxqMKEBpsLSVolRmTpaZcXIYTEoKgOZkKxZMkSLF682PezyWRCTk5OlzyW1SWgyea64HkiY/j+ZD3WFpWj6ExTwHE5z2H6EKlK79CM9i2FUSfrzuN5DqP6GCI9DEIIIZ0U1cFMSkoKZDIZqqur/W6vrq5GRkZG0N9RqVRQqVTdMbwLsjo92HioCuv2lqOiKbDhY4JGgatHZ+Ka0VlI1rVvzNTJmhBCCPEX1cGMUqnE2LFjsWXLFsybNw+AlAC8ZcsW/PrXv47s4NpQ3mjHur3l2HioCjZXYBLwwFQdFo7NxqVD0tqd40KdrAkhhJDgov7KuHjxYtx6660YN24cxo8fjxUrVsBqteKXv/xlpIfmhzGGvWea8GFRGXadbMD5KcA8B1w8MAULCrORn21odxsB6mRNCCGEtC3qr5DXX389amtr8dRTT6GqqgpjxozBxo0bA5KCI8XhFvDVkRqs21uOU3XWgOM6lRxXjcrAvDHZyDAEb/gYDHWyJoQQQton6uvMdFZX1ZmpNNrx+jen8OGeszA5Ahs+5iZpMb8gG1eOSIemA0m6cp6HQauAXk1NIAkhhPRePabOTLTaXlKHW/69G0KQomvj+ydhYWE2xvZNbLNK7/mokzUhhBASGgpmQlCYm4h4tRxNNqlvklrBY+aIDMwvyEZukrZD98VzHAzNTSCpxgkhhBDScRTMhECjlOHG8blYX1yOa0Zn4aqRmdCpO/ZSUidrQgghJDwoZyZENpcHDrfYrqJ5LVEna0IIIeTCKGemG2iVcrgFd7vPp07WhBBCSNegYKYbUCdrQgghpOtQMNOFqJM1IYQQ0vUomOkC1MmaEEII6T4UzISRWiFDUhx1siaEEEK6EwUzYUCdrAkhhJDIoWCmE5QyHul6NeKoCSQhhBASMXQV7gSaiSGEEEIij7bZEEIIISSmUTBDCCGEkJhGwQwhhBBCYhoFM4QQQgiJaRTMEEIIISSmUTBDCCGEkJhGwQwhhBBCYhoFM4QQQgiJaRTMEEIIISSmUTBDCCGEkJhGwQwhhBBCYhoFM4QQQgiJaRTMEEIIISSmUTBDCCGEkJhGwQwhhBBCYpo80gPoaowxAIDJZIrwSAghhBDSXt7rtvc63pYeH8yYzWYAQE5OToRHQgghhJCOMpvNMBgMbZ7DsfaEPDFMFEVUVFQgPj4eHMdFejgRZzKZkJOTg7Nnz0Kv10d6OBFHr4c/ej0C0Wvij14Pf/R6+Avn68EYg9lsRlZWFni+7ayYHj8zw/M8+vTpE+lhRB29Xk//4bVAr4c/ej0C0Wvij14Pf/R6+AvX63GhGRkvSgAmhBBCSEyjYIYQQgghMY2CmV5GpVLh6aefhkqlivRQogK9Hv7o9QhEr4k/ej380evhL1KvR49PACaEEEJIz0YzM4QQQgiJaRTMEEIIISSmUTBDCCGEkJhGwQwhhBBCYhoFMz1MQ0MDbr75Zuj1eiQkJOCOO+6AxWJp8/z7778fQ4YMgUajQW5uLh544AEYjUa/8ziOC/izZs2arn46IXn11VfRr18/qNVqTJgwAbt3727z/A8++ABDhw6FWq3GqFGj8Pnnn/sdZ4zhqaeeQmZmJjQaDWbMmIETJ0505VMIq468Hq+//jqmTJmCxMREJCYmYsaMGQHn33bbbQHvhVmzZnX10wibjrweb731VsBzVavVfuf0pvfH9OnTg34WzJkzx3dOLL8/vvnmG1x99dXIysoCx3H4+OOPL/g7W7duRWFhIVQqFQYOHIi33nor4JyOfiZFk46+JmvXrsUVV1yB1NRU6PV6TJo0CZs2bfI75w9/+EPAe2To0KGdGygjPcqsWbPY6NGj2ffff8++/fZbNnDgQHbjjTe2ev6BAwfYggUL2IYNG1hJSQnbsmULGzRoEFu4cKHfeQDYqlWrWGVlpe+P3W7v6qfTYWvWrGFKpZL9+9//ZocOHWJ33XUXS0hIYNXV1UHP3759O5PJZOyvf/0rO3z4MHviiSeYQqFgBw4c8J2zfPlyZjAY2Mcff8z27dvHrrnmGta/f/+ofP7n6+jrcdNNN7FXX32V7d27lx05coTddtttzGAwsLKyMt85t956K5s1a5bfe6GhoaG7nlKndPT1WLVqFdPr9X7Ptaqqyu+c3vT+qK+v93stDh48yGQyGVu1apXvnFh+f3z++efs97//PVu7di0DwNatW9fm+SdPnmRarZYtXryYHT58mL388stMJpOxjRs3+s7p6GscbTr6mvzmN79hzz77LNu9ezc7fvw4W7JkCVMoFKyoqMh3ztNPP81GjBjh9x6pra3t1DgpmOlBDh8+zACwH374wXfbF198wTiOY+Xl5e2+n/fff58plUrmdrt9t7XnTRwNxo8fzxYtWuT7WRAElpWVxZYtWxb0/Ouuu47NmTPH77YJEyawe+65hzHGmCiKLCMjgz333HO+401NTUylUrH33nuvC55BeHX09Tifx+Nh8fHx7O233/bdduutt7K5c+eGe6jdoqOvx6pVq5jBYGj1/nr7++OFF15g8fHxzGKx+G6L5fdHS+35zHv00UfZiBEj/G67/vrr2cyZM30/d/Y1jiahXgeGDx/Oli5d6vv56aefZqNHjw7fwBhjtMzUg+zcuRMJCQkYN26c77YZM2aA53ns2rWr3fdjNBqh1+shl/u37lq0aBFSUlIwfvx4/Pvf/25XW/bu5HK5sGfPHsyYMcN3G8/zmDFjBnbu3Bn0d3bu3Ol3PgDMnDnTd/6pU6dQVVXld47BYMCECRNavc9oEcrrcT6bzQa3242kpCS/27du3Yq0tDQMGTIE9913H+rr68M69q4Q6uthsVjQt29f5OTkYO7cuTh06JDvWG9/f7z55pu44YYbEBcX53d7LL4/QnGhz49wvMaxThRFmM3mgM+QEydOICsrCwMGDMDNN9+MM2fOdOpxKJjpQaqqqpCWluZ3m1wuR1JSEqqqqtp1H3V1dfjTn/6Eu+++2+/2P/7xj3j//ffx5ZdfYuHChfjVr36Fl19+OWxjD4e6ujoIgoD09HS/29PT01t9/lVVVW2e7/3/jtxntAjl9Tjf7373O2RlZfl9GM+aNQv/+c9/sGXLFjz77LPYtm0bZs+eDUEQwjr+cAvl9RgyZAj+/e9/Y/369fjf//1fiKKIyZMno6ysDEDvfn/s3r0bBw8exJ133ul3e6y+P0LR2ueHyWSC3W4Py3+Dse7555+HxWLBdddd57ttwoQJeOutt7Bx40asXLkSp06dwpQpU2A2m0N+nB7fNbsneOyxx/Dss8+2ec6RI0c6/Tgmkwlz5szB8OHD8Yc//MHv2JNPPun7e0FBAaxWK5577jk88MADnX5cEp2WL1+ONWvWYOvWrX5JrzfccIPv76NGjUJ+fj7y8vKwdetWXH755ZEYapeZNGkSJk2a5Pt58uTJGDZsGF577TX86U9/iuDIIu/NN9/EqFGjMH78eL/be9P7g7Rt9erVWLp0KdavX+/3RXv27Nm+v+fn52PChAno27cv3n//fdxxxx0hPRbNzMSAhx9+GEeOHGnzz4ABA5CRkYGamhq/3/V4PGhoaEBGRkabj2E2mzFr1izEx8dj3bp1UCgUbZ4/YcIElJWVwel0dvr5hUtKSgpkMhmqq6v9bq+urm71+WdkZLR5vvf/O3Kf0SKU18Pr+eefx/Lly7F582bk5+e3ee6AAQOQkpKCkpKSTo+5K3Xm9fBSKBQoKCjwPdfe+v6wWq1Ys2ZNuy48sfL+CEVrnx96vR4ajSYs77lYtWbNGtx55514//33A5bizpeQkIDBgwd36j1CwUwMSE1NxdChQ9v8o1QqMWnSJDQ1NWHPnj2+3/36668hiiImTJjQ6v2bTCZceeWVUCqV2LBhQ8DW02CKi4uRmJgYVc3VlEolxo4diy1btvhuE0URW7Zs8ft23dKkSZP8zgeAL7/80nd+//79kZGR4XeOyWTCrl27Wr3PaBHK6wEAf/3rX/GnP/0JGzdu9Mu/ak1ZWRnq6+uRmZkZlnF3lVBfj5YEQcCBAwd8z7U3vj8AqZyB0+nEz3/+8ws+Tqy8P0Jxoc+PcLznYtF7772HX/7yl3jvvff8tu23xmKxoLS0tHPvkbCmE5OImzVrFisoKGC7du1i3333HRs0aJDf1uyysjI2ZMgQtmvXLsYYY0ajkU2YMIGNGjWKlZSU+G2V83g8jDHGNmzYwF5//XV24MABduLECfaPf/yDabVa9tRTT0XkObZlzZo1TKVSsbfeeosdPnyY3X333SwhIcG3nfYXv/gFe+yxx3znb9++ncnlcvb888+zI0eOsKeffjro1uyEhAS2fv16tn//fjZ37tyY2nrbkddj+fLlTKlUsg8//NDvvWA2mxljjJnNZvbII4+wnTt3slOnTrGvvvqKFRYWskGDBjGHwxGR59gRHX09li5dyjZt2sRKS0vZnj172A033MDUajU7dOiQ75ze9P7wuuSSS9j1118fcHusvz/MZjPbu3cv27t3LwPA/v73v7O9e/ey06dPM8YYe+yxx9gvfvEL3/nerdm//e1v2ZEjR9irr74adGt2W69xtOvoa/Luu+8yuVzOXn31Vb/PkKamJt85Dz/8MNu6dSs7deoU2759O5sxYwZLSUlhNTU1IY+Tgpkepr6+nt14441Mp9MxvV7PfvnLX/ouRIwxdurUKQaA/d///R9jjLH/+7//YwCC/jl16hRjTNrePWbMGKbT6VhcXBwbPXo0++c//8kEQYjAM7ywl19+meXm5jKlUsnGjx/Pvv/+e9+xadOmsVtvvdXv/Pfff58NHjyYKZVKNmLECPbZZ5/5HRdFkT355JMsPT2dqVQqdvnll7Njx451x1MJi468Hn379g36Xnj66acZY4zZbDZ25ZVXstTUVKZQKFjfvn3ZXXfdFTMfzIx17PV48MEHfeemp6ezq666yq9eBmO96/3BGGNHjx5lANjmzZsD7ivW3x+tfR56X4Nbb72VTZs2LeB3xowZw5RKJRswYIBfzR2vtl7jaNfR12TatGltns+YtH09MzOTKZVKlp2dza6//npWUlLSqXFyjEXZ/lpCCCGEkA6gnBlCCCGExDQKZgghhBAS0yiYIYQQQkhMo2CGEEIIITGNghlCCCGExDQKZgghhBAS0yiYIYQQQkhMo2CGEEIIIR32zTff4Oqrr0ZWVhY4jsPHH3/c4ftgjOH555/H4MGDoVKpkJ2djT//+c8dvh8KZgghPdr27dsxatQoKBQKzJs3D1u3bgXHcWhqaor00Hz69euHFStWRHoYhHSI1WrF6NGj8eqrr4Z8H7/5zW/wxhtv4Pnnn8fRo0exYcOGgE7s7SEPeQSEEBIDFi9ejDFjxuCLL76ATqeDVqtFZWUlDAZDpIdGSEybPXs2Zs+e3epxp9OJ3//+93jvvffQ1NSEkSNH4tlnn8X06dMBAEeOHMHKlStx8OBBDBkyBIDUvDUUNDNDCOnRSktLcdlll6FPnz5ISEiAUqlERkYGOI4Ler4gCBBFsZtHSUjP8+tf/xo7d+7EmjVrsH//fvzP//wPZs2ahRMnTgAAPvnkEwwYMACffvop+vfvj379+uHOO+9EQ0NDhx+LghlCepnp06fjgQcewKOPPoqkpCRkZGTgD3/4g+94U1MT7rzzTqSmpkKv1+Oyyy7Dvn37AABGoxEymQw//vgjAEAURSQlJWHixIm+3//f//1f5OTktGssZWVluPHGG5GUlIS4uDiMGzcOu3bt8h1fuXIl8vLyoFQqMWTIELzzzjt+v89xHN544w3Mnz8fWq0WgwYNwoYNGwAAP/30EziOQ319PW6//XZwHIe33norYJnprbfeQkJCAjZs2IDhw4dDpVLhzJkz6NevH5555hnccsst0Ol06Nu3LzZs2IDa2lrMnTsXOp0O+fn5vtfC67vvvsOUKVOg0WiQk5ODBx54AFar1Xe8pqYGV199NTQaDfr374933323Xa8VIbHkzJkzWLVqFT744ANMmTIFeXl5eOSRR3DJJZdg1apVAICTJ0/i9OnT+OCDD/Cf//wHb731Fvbs2YNrr7224w/YqTaVhJCYM23aNKbX69kf/vAHdvz4cfb2228zjuN8XZBnzJjBrr76avbDDz+w48ePs4cffpglJyez+vp6xhhjhYWF7LnnnmOMMVZcXMySkpKYUqn0dWe/88472c0333zBcZjNZjZgwAA2ZcoU9u2337ITJ06w//73v2zHjh2MMcbWrl3LFAoFe/XVV9mxY8fY3/72NyaTydjXX3/tuw8ArE+fPmz16tXsxIkT7IEHHmA6nY7V19czj8fDKisrmV6vZytWrGCVlZXMZrP5ugA3NjYyxhhbtWoVUygUbPLkyWz79u3s6NGjzGq1sr59+7KkpCT2z3/+kx0/fpzdd999TK/Xs1mzZrH333+fHTt2jM2bN48NGzaMiaLIGGOspKSExcXFsRdeeIEdP36cbd++nRUUFLDbbrvNN+bZs2ez0aNHs507d7Iff/yRTZ48mWk0GvbCCy907h+WkAgCwNatW+f7+dNPP2UAWFxcnN8fuVzOrrvuOsYYY3fddRcD4Ndlfs+ePQwAO3r0aMcePyzPghASM6ZNm8YuueQSv9suuugi9rvf/Y59++23TK/XM4fD4Xc8Ly+Pvfbaa4wxxhYvXszmzJnDGGNsxYoV7Prrr2ejR49mX3zxBWOMsYEDB7J//etfFxzHa6+9xuLj431B0vkmT57M7rrrLr/b/ud//oddddVVvp8BsCeeeML3s8ViYQB8Y2GMMYPBwFatWuX7OVgwA4AVFxf7PVbfvn3Zz3/+c9/PlZWVDAB78sknfbft3LmTAWCVlZWMMcbuuOMOdvfdd/vdz7fffst4nmd2u50dO3aMAWC7d+/2HT9y5AgDQMEMiWnnBzNr1qxhMpmMHT16lJ04ccLvj/e/l6eeeorJ5XK/+7HZbAyA78tVe1ECMCG9UH5+vt/PmZmZqKmpwb59+2CxWJCcnOx33G63o7S0FAAwbdo0vPnmmxAEAdu2bcOVV16JjIwMbN26Ffn5+SgpKfEl+LWluLgYBQUFSEpKCnr8yJEjuPvuu/1uu/jii/Hiiy+2+lzi4uKg1+tRU1NzwcdvSalUBrwm5993eno6AGDUqFEBt9XU1CAjIwP79u3D/v37/ZaOGGMQRRGnTp3C8ePHIZfLMXbsWN/xoUOHIiEhoUPjJSTaFRQUQBAE1NTUYMqUKUHPufjii+HxeFBaWoq8vDwAwPHjxwEAffv27dDjUTBDSC+kUCj8fuY4DqIowmKxIDMzE1u3bg34He8Fd+rUqTCbzSgqKsI333yDv/zlL8jIyMDy5csxevRoZGVlYdCgQRccg0ajCcdTafW5dIRGowmaENzyvr3Hg93mfTyLxYJ77rkHDzzwQMB95ebm+j6oCekJLBYLSkpKfD+fOnUKxcXFSEpKwuDBg3HzzTfjlltuwd/+9jcUFBSgtrYWW7ZsQX5+PubMmYMZM2agsLAQt99+O1asWAFRFLFo0SJcccUVGDx4cIfGQgnAhBCfwsJCVFVVQS6XY+DAgX5/UlJSAEhBTX5+Pl555RUoFAoMHToUU6dOxd69e/Hpp59i2rRp7Xqs/Px8FBcXt7pzYdiwYdi+fbvfbdu3b8fw4cM79yS7UGFhIQ4fPhzw2g0cOBBKpRJDhw6Fx+PBnj17fL9z7NixqKp5Q0h7/fjjjygoKEBBQQEAqQxCQUEBnnrqKQDAqlWrcMstt+Dhhx/GkCFDMG/ePPzwww/Izc0FAPA8j08++QQpKSmYOnUq5syZg2HDhmHNmjUdHgvNzBBCfGbMmIFJkyZh3rx5+Otf/4rBgwejoqICn332GebPn49x48YBkHZEvfzyy75dB0lJSRg2bBj++9//truA1o033oi//OUvmDdvHpYtW4bMzEzs3bsXWVlZmDRpEn7729/iuuuuQ0FBAWbMmIFPPvkEa9euxVdffdVlz7+zfve732HixIn49a9/jTvvvBNxcXE4fPgwvvzyS7zyyisYMmQIZs2ahXvuuQcrV66EXC7Hgw8+GLZZKkK60/Tp0yGlywSnUCiwdOlSLF26tNVzsrKy8NFHH3V6LDQzQwjx4TgOn3/+OaZOnYpf/vKXGDx4MG644QacPn3alx8CSHkzgiD45cZMnz494La2KJVKbN68GWlpabjqqqswatQoLF++HDKZDAAwb948vPjii3j++ecxYsQIvPbaa1i1alW77z8S8vPzsW3bNhw/fhxTpkzxfUvNysrynbNq1SpkZWVh2rRpWLBgAe6++26kpaVFcNSExD6OtRVWEUIIIYREOZqZIYQQQkhMo2CGENIl/vKXv0Cn0wX901Y/F0II6ShaZiKEdImGhoZWdyppNBpkZ2d384gIIT0VBTOEEEIIiWm0zEQIIYSQmEbBDCGEEEJiGgUzhBBCCIlpFMwQQgghJKZRMEMIIYSQmEbBDCGEEEJiGgUzhBBCCIlp/w8OnJknZSEmoQAAAABJRU5ErkJggg==", "text/plain": [ "
" ] @@ -606,7 +597,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -642,7 +633,7 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAGxCAYAAACXwjeMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAABXtklEQVR4nO3deXwU9f0/8NfM7JVNshuSEJJAuMIVMUAURUAOK+XQrwraKmqrqKClqPUHtgoeiPYr3tKqpX61hWqrVFtRahUPWlABD045IpAYuZIQSMhuNpvsNZ/fH5tdsuTabDbZTPJ6Ph4r7MxnZ987jDvv/ZySEEKAiIiISKPkWAdARERE1BZMZoiIiEjTmMwQERGRpjGZISIiIk1jMkNERESaxmSGiIiINI3JDBEREWkakxkiIiLSNF2sA2hvqqqiuLgYiYmJkCQp1uEQERFRGIQQqKqqQmZmJmS5+bqXLp/MFBcXIysrK9ZhEBERUQSOHj2KPn36NFumyycziYmJAPwnw2KxxDgaIiIiCofdbkdWVlbwPt6cLp/MBJqWLBYLkxkiIiKNCaeLCDsAExERkaYxmSEiIiJNYzJDREREmsZkhoiIiDSNyQwRERFpGpMZIiIi0jQmM0RERKRpTGaIiIhI05jMEBERkaZ1+RmAiZqiqgL7iu2ocLqRbDZgeKYFsszFSImItIbJDHVLWwpOYeWmQhSWOeDxCegVCdlpCZg/KRvjBqXGOjwiImoFNjNRt7Ol4BSWrN2D/BI7FFlCnEGGIkvIL7Fjydo92FJwKtYhEhFRK7BmhroVVRVYuakQp51ueH0qKp0eCAFIEmDUSfD4VKzcVIiLBqawyYmISCNYM0Pdyr5iO/YX2+Go9cLpVuFVBXxCwKsKON0qHLVe7C+2Y1+xPdahEhFRmFgzQ91KucMFW40HPuF/Xr/uRQDwCcBW40G5wxWL8IiIKAKsmaFupbzaDa/qz2SkwH/qHoHExqsKlFe7YxMgERG1GpMZ6lZstZ7g38VZ+0QT5YiIqHNjMxN1KwokSDiTuIizMxr4a2gUsPMvEZFWsGaGupVRfZNg0MmQAcj1mpakuucyAINOxqi+STGLkYiIWofJDHUrub2tGNIrAYHqGZ0iQa9I0CmSv7pGAob0SkBub2usQyUiojAxmaFuRZYlLJ6Rg56JRiiKBCEEVFVACAFFkZCWaMTiGTmcY4aISEOYzFC3M25QKp6/dhTGDEhBD7MRCSY9epiNGDMgBc9dO4rLGRARaUxMk5nly5fjggsuQGJiItLS0jBz5kwcOHAgpMzkyZMhSVLI4xe/+EWMIqauYtygVKyecwGWXJaDeRMGYMllOVg95wImMkREGhTT0UybNm3CggULcMEFF8Dr9WLJkiWYOnUq9u/fj/j4+GC5efPm4dFHHw0+N5vNsQiXupDGFpp8Z+cxLjRJRKRBMU1m1q9fH/J89erVSEtLw/bt2zFx4sTgdrPZjPT09I4Oj7qowEKTDpcXPcwGGBQZbp+K/JIqLFm7B4/PymVCQ0SkIZ2qz4zNZgMAJCcnh2z/29/+htTUVJx77rlYvHgxnE5nLMKjLiCw0KTD5UW6xQSTXoEsSzDpFaRbjHC4fFi5qRCq2sgENERE1Cl1mknzVFXFPffcg/Hjx+Pcc88Nbr/hhhvQr18/ZGZm4ttvv8V9992HAwcO4J133mn0OC6XCy7XmXV17HYuGEhn7Cu2o7DMgR5mAyQpdMSSJElIMutRWObAvmI7cvtweDYRkRZ0mmRmwYIF2Lt3L7744ouQ7bfffnvw77m5ucjIyMCll16KwsJCZGdnNzjO8uXLsWzZsnaPl7SpwumGxydgUBqvlDQqMmyqQIWTazMREWlFp2hmuvPOO/H+++/jv//9L/r06dNs2TFjxgAACgoKGt2/ePFi2Gy24OPo0aNRj5e0K9lsgF6R4Papje53+VToZQnJZkMHR0ZERJGKac2MEAJ33XUX1q5di40bN2LAgAEtvmbXrl0AgIyMjEb3G41GGI3GaIZJXcjwTAuy0xKQX1KFdIsc0tQkhECl04OcjEQMz7TEMEoiImqNmNbMLFiwAH/961/xxhtvIDExEaWlpSgtLUVNTQ0AoLCwEI899hi2b9+OH374AevWrcNNN92EiRMnYsSIEbEMnTRKliXMn5SNBKOCUrsLNR4fVFWgxuNDqd2FBKOC+ZOyOQMwEZGGSEI0tm5wB7251PgNY9WqVZgzZw6OHj2Kn/3sZ9i7dy+qq6uRlZWFWbNm4cEHH4TFEt4vZ7vdDqvVCpvNFvZrqOsLmWdGFdDLErLTEjjPDBFRJ9Ga+3dMk5mOwGSGmqKqAvuK7ahwupFsNmB4poU1MkREnURr7t+dZjQTUUeTZYnDr4mIuoBOMZqJiIiIKFJMZoiIiEjTmMwQERGRpjGZISIiIk1jMkNERESaxmSGiIiINI3JDBEREWkakxkiIiLSNCYzREREpGlMZoiIiEjTmMwQERGRpjGZISIiIk1jMkNERESaxmSGiIiINE0X6wCIYkVVBfYV21HhdCPZbMDwTAtkWYp1WERE1EpMZqhb2lJwCis3FaKwzAGPT0CvSMhOS8D8SdkYNyg11uEREVErsJmJup0tBaewZO0e5JfYEW/UIS3RiHijDvklVViydg+2FJyKdYhERNQKTGaoW1FVgZWbCuFweZFuMcGkVyDLEkx6BekWIxwuH1ZuKoSqiliHSkREYWIyQ93KvmI7Cssc6GE2QJJC+8dIkoQksx6FZQ7sK7bHKEIiImotJjPUrVQ43fD4BAxK45e+UZHhUQUqnO4OjoyIiCLFZIa6lWSzAXpFgtunNrrf5VOhlyUkmw0dHBkREUWKyQx1K8MzLchOS8BppwdChPaLEUKg0ulBdloChmdaYhQhERG1FpMZ6lZkWcL8SdlIMCootbtQ4/FBVQVqPD6U2l1IMCqYPymb880QEWkIkxnqdsYNSsXjs3KRk5EIp8uLMocLTpcXORmJeHxWLueZISLSGE6aR93SuEGpuGhgCmcAJiLqApjMULclyxJy+1hjHQYREbURm5mIiIhI05jMEBERkaYxmSEiIiJNYzJDREREmsZkhoiIiDSNyQwRERFpGpMZIiIi0jQmM0RERKRpnDSPui1VFZwBmIioC2AyQ93SloJTWLmpEIVlDnh8AnpFQnZaAuZPyubaTEREGsNmJup2thScwpK1e5BfYke8UYe0RCPijTrkl1Rhydo92FJwKtYhEhFRKzCZoW5FVQVWbiqEw+VFusUEk16BLEsw6RWkW4xwuHxYuakQqipiHSoREYWJyQx1K/uK7Sgsc6CH2QBJCu0fI0kSksx6FJY5sK/YHqMIiYiotZjMULdS4XTD4xMwKI1f+kZFhkcVqHC6OzgyIiKKFJMZ6laSzQboFQlun9rofpdPhV6WkGw2dHBkREQUKSYz1K0Mz7QgOy0Bp50eCBHaL0YIgUqnB9lpCRieaYlRhERE1FpMZqhbkWUJ8ydlI8GooNTuQo3HB1UVqPH4UGp3IcGoYP6kbM43Q0SkIUxmqNsZNygVj8/KRU5GIpwuL8ocLjhdXuRkJOLxWbmcZ4aISGM4aR51S+MGpeKigSmcAZiIqAtgMkPdlixLyO1jjXUYRETURmxmIiIiIk1jMkNERESaxmYm6ra4ajYRUdfAZIa6Ja6aTUTUdbCZibodrppNRNS1MJmhboWrZhMRdT1MZqhbqb9qNgDUuH2oqvWgxu0DAK6aTUSkQewzQ91KYNVst1dFia0GLq8KIQBJAow6GSnxRq6aTUSkMTGtmVm+fDkuuOACJCYmIi0tDTNnzsSBAwdCytTW1mLBggVISUlBQkICrrnmGpw4cSJGEZPWJZsNUIWKYlsNajwqZEmCTpYgSxJqPP7tqqpy1WwiIg2JaTKzadMmLFiwAF9++SU++eQTeDweTJ06FdXV1cEy/+///T/861//wttvv41NmzahuLgYV199dQyjJi3LSU+ETwBen4BOBmRJgiRJdUmNf7tP+MsREZE2xLSZaf369SHPV69ejbS0NGzfvh0TJ06EzWbDn/70J7zxxhv40Y9+BABYtWoVcnJy8OWXX+Kiiy6KRdikYfmlVVAkCYoswacCkAUkCRAC8KmAIktQJAn5pVVc6oCISCM6VQdgm80GAEhOTgYAbN++HR6PB1OmTAmWGTZsGPr27YutW7c2egyXywW73R7yIAqocLohSxJ694iDSa9AFQJen4AqBEx6Bb17xEGWJfaZISLSkE7TAVhVVdxzzz0YP348zj33XABAaWkpDAYDkpKSQsr26tULpaWljR5n+fLlWLZsWXuHSxqVbDZAr0gwKDL6p5pR61bhVVXoZBkmg4xajwq9zD4zRERa0mlqZhYsWIC9e/dizZo1bTrO4sWLYbPZgo+jR49GKULqCoZnWpCdloDTTg8ggDiDgkSTHnEGBRBApdOD7LQEDM+0xDpUIiIKU6dIZu688068//77+O9//4s+ffoEt6enp8PtdqOysjKk/IkTJ5Cent7osYxGIywWS8iDKECWJcyflI0Eo4JSuws1Hh9UVaDG40Op3YUEo4L5k7K5RhMRkYbENJkRQuDOO+/E2rVr8Z///AcDBgwI2X/++edDr9djw4YNwW0HDhzAkSNHMHbs2I4Ol7qIcYNS8fisXORkJMLp8qLM4YLT5UVORiIen5XLtZmIiDQmpn1mFixYgDfeeAPvvfceEhMTg/1grFYr4uLiYLVacdttt2HhwoVITk6GxWLBXXfdhbFjx3IkE7XJuEGpuGhgClfNJiLqAiQhRMwWoZGkxm8cq1atwpw5cwD4J81btGgR3nzzTbhcLkybNg1/+MMfmmxmOpvdbofVaoXNZmOTExERkUa05v4d02SmIzCZISIi0p7W3L87RQdgIiIiokgxmSEiIiJNYzJDREREmsZkhoiIiDSNyQwRERFpGpMZIiIi0jQmM0RERKRpTGaIiIhI05jMEBERkaYxmSEiIiJNYzJDREREmhZxMlNZWYlXX30VixcvRkVFBQBgx44dOH78eNSCIyIiImqJLpIXffvtt5gyZQqsVit++OEHzJs3D8nJyXjnnXdw5MgRvPbaa9GOk4iIiKhREdXMLFy4EHPmzMGhQ4dgMpmC2y+77DJ89tlnUQuOiIiIqCURJTPffPMN7rjjjgbbe/fujdLS0jYHRURERBSuiJIZo9EIu93eYPvBgwfRs2fPNgdFREREFK6Ikpkrr7wSjz76KDweDwBAkiQcOXIE9913H6655pqoBkhERETUnIiSmWeffRYOhwNpaWmoqanBpEmTMGjQICQmJuJ///d/ox0jERERUZMiGs1ktVrxySefYPPmzdi9ezccDgfOO+88TJkyJdrxERERETUromQmYPz48Rg/fjwA/7wzRERERB0tomamJ598En//+9+Dz6+99lqkpKSgd+/e2L17d9SCIyIiImpJRMnMH//4R2RlZQEAPvnkE3zyySf48MMPMWPGDPz617+OaoBEREREzYmomam0tDSYzLz//vu49tprMXXqVPTv3x9jxoyJaoBEREREzYmoZqZHjx44evQoAGD9+vXBjr9CCPh8vuhFR0RERNSCiGpmrr76atxwww0YPHgwysvLMWPGDADAzp07MWjQoKgGSERERNSciJKZ559/Hv3798fRo0fx1FNPISEhAQBQUlKCX/7yl1ENkIiIiKg5khBCxDqI9mS322G1WmGz2WCxWGIdDhEREYWhNffvsGtm1q1bhxkzZkCv12PdunXNlr3yyivDPSwRERFRm4RdMyPLMkpLS5GWlgZZbrrfsCRJnaoTMGtmiIiItKddamZUVW3070RERESxFNHQbCIiIqLOIqLRTI8++miz+x9++OGIgiEiIiJqrYiSmbVr14Y893g8KCoqgk6nQ3Z2NpMZIiIi6jARJTM7d+5ssM1ut2POnDmYNWtWm4MiIiIiClfU+sxYLBYsW7YMDz30ULQOSURERNSiqHYAttlssNls0TwkERERUbMiamb6/e9/H/JcCIGSkhK8/vrrwXWaiIiIiDpCxGsz1SfLMnr27Imbb74ZixcvjkpgREREROGIKJkpKiqKdhxEREREEWl1nxmPxwOdToe9e/e2RzxERERErdLqZEav16Nv376dav0lIiIi6r4iGs30wAMPYMmSJaioqIh2PEREREStElGfmRdffBEFBQXIzMxEv379EB8fH7J/x44dUQmOiIiIqCURJTMzZ86MchhEREREkZGEECLWQbQnu90Oq9UKm80Gi8US63CIiIgoDK25f0dUMxOwbds25OfnAwDOOeccnH/++W05HBEREVGrRZTMHDt2DNdffz02b96MpKQkAEBlZSXGjRuHNWvWoE+fPtGMkYiIiKhJEY1mmjt3LjweD/Lz81FRUYGKigrk5+dDVVXMnTs32jESERERNSmiPjNxcXHYsmUL8vLyQrZv374dEyZMgNPpjFqAbcU+M0RERNrTmvt3RDUzWVlZ8Hg8Dbb7fD5kZmZGckgiIiKiiESUzDz99NO46667sG3btuC2bdu24Ve/+hWeeeaZqAVHRERE1JKwm5l69OgBSZKCz6urq+H1eqHT+fsQB/4eHx/fqWYGZjMTERGR9rTL0OwVK1a0NS4iIiKiqAs7mbn55ptbffAnnngCv/jFL4LDt4mIiIiiLaI+M+F6/PHHm21y+uyzz3DFFVcgMzMTkiTh3XffDdk/Z84cSJIU8pg+fXp7hkxEREQa067JTEvdcaqrqzFy5Ei89NJLTZaZPn06SkpKgo8333wz2mESERGRhrVpOYO2mjFjBmbMmNFsGaPRiPT09A6KiIiIiLSmXWtmomHjxo1IS0vD0KFDMX/+fJSXl8c6JCIiIupEYloz05Lp06fj6quvxoABA1BYWIglS5ZgxowZ2Lp1KxRFafQ1LpcLLpcr+Nxut3dUuERERBQDnTqZmT17dvDvubm5GDFiBLKzs7Fx40Zceumljb5m+fLlWLZsWUeFSERERDHWrs1MEyZMQFxcXNSON3DgQKSmpqKgoKDJMosXL4bNZgs+jh49GrX3JyIios4nopqZHTt2QK/XIzc3FwDw3nvvYdWqVTjnnHPwyCOPwGAwAAA++OCD6EUK4NixYygvL0dGRkaTZYxGI4xGY1Tfl4iIiDqviGpm7rjjDhw8eBAA8P3332P27Nkwm814++238Zvf/Cbs4zgcDuzatQu7du0CABQVFWHXrl04cuQIHA4Hfv3rX+PLL7/EDz/8gA0bNuCqq67CoEGDMG3atEjCJiIioi4oomTm4MGDGDVqFADg7bffxsSJE/HGG29g9erV+Oc//xn2cbZt24a8vDzk5eUBABYuXIi8vDw8/PDDUBQF3377La688koMGTIEt912G84//3x8/vnnrHkhIiKioIiamYQQUFUVAPDpp5/if/7nfwAAWVlZOHXqVNjHmTx5crMT63300UeRhEdERETdSEQ1M6NHj8Zvf/tbvP7669i0aRMuv/xyAP5mol69ekU1QCIiIqLmRJTMrFixAjt27MCdd96JBx54AIMGDQIA/OMf/8C4ceOiGiARERFRcyTR0gJKrVBbWwtFUaDX66N1yDaz2+2wWq2w2WywWCyxDoeIiIjC0Jr7d1QnzTOZTNE8HBEREVGLwk5mevToAUmSwipbUVERcUBERERErRF2MrNixYrg38vLy/Hb3/4W06ZNw9ixYwEAW7duxUcffYSHHnoo6kESERERNSWiPjPXXHMNLrnkEtx5550h21988UV8+umnePfdd6MVX5uxzwwREZH2tOb+HdFopo8++gjTp09vsH369On49NNPIzkkERERUUQiSmZSUlLw3nvvNdj+3nvvISUlpc1BEREREYUrotFMy5Ytw9y5c7Fx40aMGTMGAPDVV19h/fr1eOWVV6IaIBEREVFzIkpm5syZg5ycHPz+97/HO++8AwDIycnBF198EUxuiIiIiDpCVCfN64zYAZiIiEh7OmTSPFVVUVBQgLKysuCikwETJ06M9LBEHUZVBfYV21HhdCPZbMDwTAtkOby5lIiIqPOIKJn58ssvccMNN+Dw4cMNVr2WJAk+ny8qwRG1ly0Fp7ByUyEKyxzw+AT0ioTstATMn5SNcYNSYx0eERG1QkSjmX7xi19g9OjR2Lt3LyoqKnD69Ongg7P/Ume3peAUlqzdg/wSO+KNOqQlGhFv1CG/pApL1u7BloJTsQ6RiIhaIaKamUOHDuEf//hHcLVsIq1QVYGVmwrhcHmRbjEFl+gwyQrSLTJK7S6s3FSIiwamsMmJiEgjIqqZGTNmDAoKCqIdC1G721dsR2GZAz3MhgZrjUmShCSzHoVlDuwrtscoQiIiaq2IambuuusuLFq0CKWlpcjNzYVerw/ZP2LEiKgERxRtFU43PD4Bg9J4Hm9UZNhUgQqnu4MjIyKiSEWUzFxzzTUAgFtvvTW4TZIkCCHYAZg6tWSzAXpFgtunwiQrDfa7fCr0soRksyEG0RERUSQiSmaKioqiHQdRhxieaUF2WgLyS6rQK1GCyyvgVVXoZBlGnYRKpwc5GYkYnsk5iYiItCKiZKZfv37RjoOoQ8iyhPmTsvH/3tqFgyccUAUgICBBgiwByQkGzJ+Uzc6/REQaElEHYAB4/fXXMX78eGRmZuLw4cMAgBUrVjS6ACVRZ+P2qvCqAj4hoArAJwS8qoDbq7b8YiIi6lQiSmZWrlyJhQsX4rLLLkNlZWWwj0xSUhJWrFgRzfiIokpVBZZ/mA9bjQeyBOgVCQZFgl7x18zYajxY/mE+VLVLr/JBRNSlRJTMvPDCC3jllVfwwAMPQFHOdKIcPXo09uzZE7XgiKJtz3EbDp5wQAKg18nQyTIU2f+nXidDAnDwhAN7jttiHSoREYUpomSmqKgIeXl5DbYbjUZUV1e3OSii9rLrSCU8PhVKXZ8YVQj4VAG1blkORZbg8anYdaQyhlESEVFrRJTMDBgwALt27Wqwff369cjJyWlrTETtRtT161WFv9+M26vC7VODfw+0Lgn2/yUi0oyIRjMtXLgQCxYsQG1tLYQQ+Prrr/Hmm29i+fLlePXVV6MdI1HU5GUlQZEkeOqyFgmAJAEQ/gRHFQJ6WUJeVlIswyQiolaIKJmZO3cu4uLi8OCDD8LpdOKGG25AZmYmfve732H27NnRjpEoaoZnWGDQy/C4/J3WRfA/Zxj0MoZncJ4ZIiKtaHUy4/V68cYbb2DatGm48cYb4XQ64XA4kJaW1h7xEUVVfmkVTDoZNW4fGhuwJEuASScjv7QKuX2sHR8gERG1Wqv7zOh0OvziF79AbW0tAMBsNjORIc2ocLrh9fmTlsbIEuBVwbWZiIg0JKIOwBdeeCF27twZ7ViI2l1SnB41Hh+EAPQyoJP9CYxO9j8XAqhx+5AUp2/5YERE1ClE1Gfml7/8JRYtWoRjx47h/PPPR3x8fMh+rppNnZkkAT7hfwTUb3LScSQTEZGmRJTMBDr53n333cFtXDWbtKCyxuMfvdQMSfKXIyIibeCq2dStWEw6uDzNr7/k8qiwmCL6X4OIiGIgom/sw4cPY9y4cdDpQl/u9XqxZcsWrqpNndb3J6uDI7HrppcJCjwXdeXy+vbo6PCIiCgCEXUAvuSSS1BRUdFgu81mwyWXXNLmoIjaS4mtBoFWprNHZtdPckpsNR0XFBERtUlEyUygb8zZysvLG3QGJupMeieZo1qOiIhir1XNTFdffTUAf2ffOXPmwGg0Bvf5fD58++23GDduXHQjJIqiacPSGtTInE3UlSMiIm1oVTJjtfpnRBVCIDExEXFxccF9BoMBF110EebNmxfdCImi6P+2hNd5/f+2FOGeKUPaORoiIoqGViUzq1atAgD0798f9957b4tNSps3b8bo0aNDanCIYunbY5VRLUdERLEXUZ+ZpUuXhtU3ZsaMGTh+/Hgkb0HULqym8Gb2DbccERHFXkTJTLiEaKl3AlHHGpedEtVyREQUe+2azBB1NikJ4TV5hluOiIhij8kMdSunneEtUxBuOSIiij0mM9St2MNccyncckREFHvtmsw0NrEeUSyJMC/JcMsREVHssQMwdSsJBiWq5YiIKPbadWngqqqq9jw8UasdOhHeNRluOSIiir2IamZOnDiBn//858jMzIROp4OiKCEPos7qaGV4C0iGW46IiGIvopqZOXPm4MiRI3jooYeQkZHBvjGkGSZdeMl2uOWIiCj2IkpmvvjiC3z++ecYNWpUlMMhal8jelvx7q7isMoREZE2RNTMlJWVxc69pEnn90+G0kJFoiL5yxERkTZElMysWLEC999/P3744Ycoh0PUvnJ7W9E3xdxsmb4pZuSyZoaISDPCbmbq0aNHSN+Y6upqZGdnw2w2Q68PXZSvoqIiehESRVmCUQcJQGN1i1LdfiIi0o6wv7VXrFgR9Tf/7LPP8PTTT2P79u0oKSnB2rVrMXPmzOB+IQSWLl2KV155BZWVlRg/fjxWrlyJwYMHRz0W6h72FdtRXFkDSQIaaymVJKC4sgb7iu3I7cPaGSIiLQg7mbn55puj/ubV1dUYOXIkbr31Vlx99dUN9j/11FP4/e9/j7/85S8YMGAAHnroIUybNg379++HyWSKejzU9Z2qdqGyxgu1iS5fqgAqa7w4Ve3q2MCIiChiEdWnf/DBB1AUBdOmTQvZ/vHHH8Pn82HGjBlhHWfGjBlNlhVCYMWKFXjwwQdx1VVXAQBee+019OrVC++++y5mz54dSejUzVU43PA1lcnU8akCFQ53B0VERERtFVEH4Pvvvx8+n6/BdlVVcf/997c5KAAoKipCaWkppkyZEtxmtVoxZswYbN26NSrvQd2PrSa8JCXcckREFHsR1cwcOnQI55xzToPtw4YNQ0FBQZuDAoDS0lIAQK9evUK29+rVK7ivMS6XCy7XmSYCu90elXioayi1h9d8FG45IiKKvYhqZqxWK77//vsG2wsKChAfH9/moNpi+fLlsFqtwUdWVlZM46HOJcMSXl+rcMsREVHsRZTMXHXVVbjnnntQWFgY3FZQUIBFixbhyiuvjEpg6enpAPzrQNV34sSJ4L7GLF68GDabLfg4evRoVOKhrmFU3yS0tPiGVFeOiIi0IaJk5qmnnkJ8fDyGDRuGAQMGYMCAAcjJyUFKSgqeeeaZqAQ2YMAApKenY8OGDcFtdrsdX331FcaOHdvk64xGIywWS8iDKECWJMhy8+mMALD3uK1jAiIiojaLqM+M1WrFli1b8Mknn2D37t2Ii4vDiBEjMHHixFYdx+FwhPSxKSoqwq5du5CcnIy+ffvinnvuwW9/+1sMHjw4ODQ7MzMzZC4aotY4Xe0OaymOFzYcQnbPBIwblNoBURERUVu0OpnxeDyIi4vDrl27MHXqVEydOjXiN9+2bRsuueSS4POFCxcC8M9ps3r1avzmN79BdXU1br/9dlRWVuLiiy/G+vXrOccMReyUw9XkHDP1Vbu8WLmpEBcNTGmxJoeIiGKr1cmMXq9H3759Gx2a3VqTJ09u9leyJEl49NFH8eijj7b5vYgA4LvS8Ea3KTJQWObgTMBERBoQUZ+ZBx54AEuWLOEaTKQ5x0/XhlVOhQSPKlDh5HwzRESdXUR9Zl588UUUFBQgMzMT/fr1azAce8eOHVEJjijajIYw83choJclJJsN7RsQERG1WUTJDDvgklblZlrw7s7iFst5VYHstAQMz+RoOCKizi6iZGbp0qXRjoOoQ1jiwqtpMRsUzJ+Uzc6/REQaEFGfGSKtsofZB+ayERkclk1EpBER1cz4fD48//zzeOutt3DkyBG43aE3CHYMps5qT0l4o5kcrraP1iMioo4RUc3MsmXL8Nxzz+G6666DzWbDwoULcfXVV0OWZTzyyCNRDpEoelxuNarliIgo9iJKZv72t7/hlVdewaJFi6DT6XD99dfj1VdfxcMPP4wvv/wy2jESRU3vHuFNuBhuOSIiir2IkpnS0lLk5uYCABISEmCz+dex+Z//+R/8+9//jl50RFE2ND280UnhliMiotiLKJnp06cPSkpKAADZ2dn4+OOPAQDffPMNjEZj9KIjirKeCUa0NEBJlvzliIhIGyJKZmbNmhVczfquu+7CQw89hMGDB+Omm27CrbfeGtUAiaIpJcGIBGPz/d4TjDqkMJkhItKMiEYzPfHEE8G/X3fddejbty+2bt2KwYMH44orrohacETRlpOeCLWFVbNVIZCTnthBERERUVtFlMycbezYsRg7dmw0DkXUrvaV2OHyND9SyeVRsa/EjpFZSR0TFBERtUnEk+a9/vrrGD9+PDIzM3H48GEAwIoVK/Dee+9FLTiiaNt1pBLeFmpmvEJg15HKjgmIiIjaLKJkZuXKlVi4cCEuu+wyVFZWwufzTzCWlJSEFStWRDM+oqgSQqCFXAZC+MsREZE2RJTMvPDCC3jllVfwwAMPQFGU4PbRo0djz549UQuOKNriTeG1rIZbjoiIYi+iZKaoqAh5eXkNthuNRlRXV7c5KKL2Yq8Jb22m3UdPt3MkREQULRElMwMGDMCuXbsabF+/fj1ycnLaGhNRu9l7PLy1mdbvOwFVZVMTEZEWRFSXvnDhQixYsAC1tbUQQuDrr7/Gm2++ieXLl+PVV1+NdoxEUeP0eMMqZ6/xYF+xHbl9rO0cERERtVVEyczcuXMRFxeHBx98EE6nEzfccAN69+6N3/3ud5g9e3a0YySKGpNOabkQ/B2AK5zhNUkREVFsRZTM1NTUYNasWbjxxhvhdDqxd+9ebN68GX369Il2fERR1TMxvJl99YqMZLOhnaMhIqJoiKjPzFVXXYXXXnsNAOB2u3HllVfiueeew8yZM7Fy5cqoBkgUTRnWuLDK9bKYMDyTi00SEWlBRMnMjh07MGHCBADAP/7xD/Tq1QuHDx/Ga6+9ht///vdRDZAomqpqPWGVG9U3CXJLK1ISEVGnEFEy43Q6kZjoX7vm448/xtVXXw1ZlnHRRRcFZwMm6ozKqlxhlYvTh9e3hoiIYi+iZGbQoEF49913cfToUXz00UeYOnUqAKCsrAwWC6vmqfOqcfuiWo6IiGIvomTm4Ycfxr333ov+/ftjzJgxwUUmP/7440Yn0yPqLJLj9VEtR0REsRfRaKaf/OQnuPjii1FSUoKRI0cGt1966aWYNWtW1IIjir5w+8GwvwwRkVZEvABNeno60tPTQ7ZdeOGFbQ6IqD2drKqJajkiIoq9iJqZiLSqqDy8JCXcckREFHtMZqhbMejCu+TDLUdERLHHb2zqVsZnp0a1HBERxR6TGepWLhrQI6rliIgo9pjMULfyyhc/RLUcERHFHpMZ6lYqqsObATjcckREFHtMZqhb6ZkQ3qrZ4ZYjIqLYYzJD3UqNO7yFJsMtR0REscdkhrqVwxW1US1HRESxx2SGuhVDmFd8uOWIiCj2+JVN3UpiXHgLSIZbjoiIYo/JDHUrlc7w+sKEW46IiGKPyQx1K5U13qiWIyKi2GMyQ92MiHI5IiKKNSYz1K1IwhfVckREFHtMZqhbqXZHtxwREcUekxnqVsKtb2G9DBGRdjCZoW5FDrMrTLjliIgo9pjMULfC7r9ERF0PkxnqVtjMRETU9TCZISIiIk1jMkNERESaxmSGiIiINI3JDBEREWkakxmiJqgqxzQREWkBkxmiJuw5bot1CEREFAYmM0RN2Hm0MtYhEBFRGDp9MvPII49AkqSQx7Bhw2IdFnUDEluZiIg0QRfrAMIxfPhwfPrpp8HnOp0mwiaNG9U3KdYhEBFRGDSRFeh0OqSnp8c6DOpmcntbYx0CERGFodM3MwHAoUOHkJmZiYEDB+LGG2/EkSNHmizrcrlgt9tDHkSRkGUp1iEQEVEYOn0yM2bMGKxevRrr16/HypUrUVRUhAkTJqCqqqrR8suXL4fVag0+srKyOjhiIiIi6kiSEEJT3RwrKyvRr18/PPfcc7jtttsa7He5XHC5XMHndrsdWVlZsNlssFgsHRkqdUL97/932GV/eOLydoyEiIiaY7fbYbVaw7p/a6LPTH1JSUkYMmQICgoKGt1vNBphNBo7OCoiIiKKlU7fzHQ2h8OBwsJCZGRkxDoUIiIi6gQ6fTJz7733YtOmTfjhhx+wZcsWzJo1C4qi4Prrr491aNTFcTkDIiJt6PTNTMeOHcP111+P8vJy9OzZExdffDG+/PJL9OzZM9ahURe3r9iO3D4cnk1E1Nl1+mRmzZo1sQ6BuqmTVbUAmMwQEXV2nb6ZiShWvuVCk0REmsBkhqgJJ221sQ6BiIjC0OmbmYhiZdexypDnqiqwr9iOCqcbyWYDhmdaOEswEVEnwGSGqAn7S6rw+YGTmDC0J7YUnMLKTYUoLHPA4xPQKxKy0xIwf1I2xg1KjXWoRETdGpuZiJogAPzyjR14eVMhlqzdg/wSO+KNOqQlGhFv1CG/pApL1u7BloJTsQ6ViKhbYzJD1IwqlxfPfnwQp51upFtMMOkVyLIEk15BusUIh8uHlZsKOScNEVEMMZkhaoHbp8LtFUAj3WNMehn7jtvw3q5iJjRERDHCZIYoDG6vD7VuNfjc4fLih/JqnLDV4nSNB7/9937cvOprNjkREcUAkxmiMAgBeFV/MuNweXH8dA1qPCogAYoEmA0K+9AQEcUIkxmiMAgAiiRBCIGTVbXwCQGd7E9yTHodrGY9+9AQEcUIkxmiMCgSUF7tRqm9FjUeFbIE+FRAliT0TDRCggRJkpBk1qOwzIF9xfZYh0xE1G1wnhmiMBh1MqrdXnh9AgKAD0CcXka6NQ4JxjP/GxkVGTZVoMLpjlmsRETdDZMZojBUe/z9ZfSKv0YGAvCpAi6vD0II6GQZJoMMl0+FXpaQbDbENmAiom6EyQxRK3h8/j8lAG6fQEllLRRZAuBPaGQZGNLLgpz0xFiGSUTUrbDPDFEERL0/BQR8KlDrVeF0q/j+pAO3/OUbjmoiIuogTGaI2sin+pMaqe7h8Qnkl9g5TJuIqIMwmSFqI1kCDIoEo16GQSfDq6qwmvQcpt3JqKrAnmM2bDp4EnuO2fjvQtSFsM8MUVsJQJHrfhdIAkIFfEKEDNPO7WONbYzdHFc9J+raWDND1EYqAI9PhSoEfHW/9hVZglGR4fEJ7Dh8mrUBMbSl4BRXPSfq4lgzEyFVFdhXbEeF041kswHDMy2Q5UZWIqRuwasKeOsSFQlAcWUNZEmCy6viuU8OQK/4m6Daozag/rWYFKcHAFTWeHhdwn9uVm4qhMPlRbrFBEnynwuTrCDdIqPU7sLKTYW4aGBKtz5PRFrHZCYCrLKm5gjAv25THVutFyadjBS9IVgb8Pis3KhcK/WvxWqXDzUeHyQJMOkVxBuUbn9d7iu2o7DMgR5mQzCRCTh7xmY2BRJpF5uZWolV1hSJWq+KE/Za6GUJp6vdeOqjA/B61ZZf2Iz616IkATUeL3yqCq9PhdPlhSRJ3f66rHC64fEJGJTGv+qMigwPZ2wm0jwmM61wdpW1Sa9AliWY9AoXGaQWeVWgxF4Le60Xe45V4icvb404yah/LfZKNMJW44FPAHpFhl4nQwCw1XjQy2Lo1tdlstkAvSLB7Ws8ceSMzURdA5OZVmhNlTVRUwQAnwD2F1di0du78cWhk60+Rv1r0eUVcHlV6GT/YpcSJCiyBJfXB5dHdOvrcnimBdlpCTjt9ECI0GROCIFKpwfZaQkYnmmJUYREFA1MZlqBVdba1tlqJtw+oMRWizte347Xt/7QqvjqX4teVYUQ/o7HQgiogYcq4PH5YJAlOD0+bDpYFhxR5fWqWLvjOF78zyGs3XG8zU1enZUsS5g/KRsJRgWldhdqPD6oqkCNx4dSuwsJRgXzJ2Wz8y+RxrEDcCvUr7I2yUqD/ayy7tw6a81EtduHpev2YdXmIkwYnIaBqfEY1TcJub2tTd5k61+LOlmGJPkXvvQJASHOLLdQYnMBkgs+n4o/fVGEN746AqNeRlmVC7VuH1T4f9Ese38fFkzOxryJ2Y2+n5ZH740blIrHZ+UGO0rbVAG9LCEnI7Fbd44m6kqYzLRCoMo6v6QK6RY5pKkpUGWdk5HIKutOqjPXmKkC+P6UE9+f+gESAJ0ioW+yGcuuHI6LB/f0lzlrCPbAnvH4rtSBXokGKLKEWk/D2pVAXxGjTkKG1YQTdheKbbUAAEUG9JL/vSudHjzx4XcQQuD2SYNCjrGl4BT+sLEA35VWweMV0OskDEtPxC8nD9JMIjBuUCouGpii2YSMiJrHZKYVAlXWS9buQandhSSzHkZFhsunotLpYZV1J6eVGjMB//pOhSercevqb3DjRf2QlmjCR/tKUWavDU4HkJJggCIDpXYXGmuhknCmhkaWJMiShMoaT3C/qvon/AvwCeCpjw5iaC8LkhOMqHC6cbTCid//5xBOV7sR7HLiBr4qqsChsl14/tpRmkloZFni8GuiLkoSZ/eK62LsdjusVitsNhsslujUmITMM1NXZd3d5/PQAlUVGLjkg1iHEZFAemzSK0izGGFQZJx2eqDIQKJRh6JT1VDrNS8FEplALQ/gT+ZOVLlCkpzG6BUJFpMOEvzJj1cVkCX/SKnAawP9dIZnWvDegouZwBNR1LXm/s2amQiwypo6WiD5qPH4cLTCiXSrCb0SDThR5YZRpyDOoMDj8y+n4O8z4/9TJ/uXVvD6BFxNDE8OtpaKM7VCNW4VaYkGnKr2N80FEiVZkiAB0MsyPD4VB0od2HPchpFZSe17AoiImsFkJkKsstaWQG1aV6AKoLiyFqeqXDDpFfxwyoEar1pXC+OvPfEJQPUJ+LvRCEiSf7Sd/1kj6tXq6GTAo6pwuH0hRbw+FbJOhgT/EHBFkeD1qdh5tJLJDBHFFJMZ6lIaG3Xz5fflWLJ2Dxwub6zDiyq3T8DtC/1MkiqgyFKwNkYA8KgC8QZ/7U2jzspuFFmCTw0dyu4f9u1/1K/JAQCpSzdUE5EWMJkhTaufvBytcGL93lJ8f/LMmlkDe8bDVuMJztp8ytF5RzRFQ2DBSwmALPlraABAJ0s4frqm0f4y9Z/rZAmABEkSMBsU/2Rz9cqJuo44Av4mLb0iY1TfpHb9TERELWEyQ5p19iKLDrcXsgSkJZqQlmiA26di73E7quoSme4kMMtwgK225VopWfI/vKpAnF6GNU6PU9Vu1HrUMwmPBKhCwOvzb+uZaMRnB8tQUOZAdmo8bC4v+5ARUYdjMhOh2rrViQNDXiX4q9/PXuaA2kdgkUWHy4ukOD1sNR5ACKgCOFnlgkEnI8GogzVOD1utB5VON1xeX8sH7oJaGr0EIDhaye1VocgSLCY9JFlChjUORyuc8NY1Ofl8arCZSQig+HQNnv3kkP99JMCkU2A2KMhKNuPeqUOCc+QATU+85/Wq+Ne3JThe6USGNQ4De8bDXtu6pKilSf0C+09Vu1BZ7UEPsx4pCUYmXRQikskhtTyhZFfCodkRGvLgh3A3MgV8IMHxV/NLZ56f9adcl/iE/Il6z+V6iVILxwy8psHzuvKBYwFn9gdeL9V7fvYxZUkCpOZf40/ipHrvGyjXxPO6zyjLDd/rTMyB/We9pu79AeCVz79HcWUNkuIM8PhUnHK4gufAp/qn+U+zGOHxCpyo8k8S18lWM+h0ZOnMOdLJgFGnIMmsR2WNB26vCqNOqeuLo6K6rnOwIgONDZKSJcCgk7Hox0Mwb2J26HQG9ZoAE4w6fF5wCjUuX3DOGwmA2aDAGqcPa8qDxo5d/3WB/fuLbbDXeqGqAnJdwnZOpgV3TBwIa5wh5GYEoM03tZz0ROSXVgUnOVSFwO5jNkgCLc7w3F6indSdPZEjAFTWeFp9U28qIWjt9rZo6TqK1mu6kvZO5Fpz/2YyE6EhD3zY5Eq8RFqlSAiZrwZASFJs1Mlwun1QRXg1PooEnNvbisKTDnh9AgadDEWW4PaeSYiaEqeXAUgw6CT8OKcX+qXE10um/Qnu4XInPtxbArdXhdmgQJFl+FQVTo8Kk07ChQNS8HVRBapdXnh8anD5hmDSpvhvjEa9Al1dx2mLyV9hXVXrhU/1J2yZSXG4cmQmzu1jDSbXgP/9q1welNld+PL7chyvrPF3vBbC/16SP9mr9XgR+O0jwT+Xz4DUeNx5ySCMHpDc6I8WWZIgyeH9kAlHS0lda2/A9W/k9lpvsObToEgw6XXo0yMOPzm/D/qmxDd7o2sqIZg4OBWbDp7CgdIquH0qDIqMIb0SMCQ9Ed8UVaDUXgsIf9KcnZbQaFIa7o21fk1vD7MBBkWG26fidN1kqI/Pym1wbiJ5TVfSEYkck5l62iuZGbTkg2DVOxFRLDVVKxvYrgqBWo8vZN2ukNfDXxuaHG+A2aDUq3FtvEbW6fbi+Oka+FTRIPk9W6AJ02xQ0D8lHikJhuCxKqrd2Fdsh9en+hNdSYIq/M34nrrv16aSZgn+RNOoU+D2qVDrOqRLEqBIEnrEGzC6fw/0S44PJob+19Sv9fU/3vj6CErttf7m1brzF2Cv9SIzyYTbJwyEoviTTAhg5aZCHK+sQVJc6GsE/EvbZCWb8eupQ4OvCUlW5fq18medZ9Svmfd/0rNrz6Xmngf+7ZtIhM/+N41ERyVyTGbqaa9kpqzK/6tAFQiuUizq/i6C2/xrNgnUPVf9F3rwT3FmlWNRdyxR97rAtkA5VRXB1zZ67EaO5f97w+fNvk+gvBp67OD7i8AQ3boJ2oB6nzf0WEKEftbASs71X9PUOfM1si/wp8PlxaETVcGmKUDA5VWDtQWA/8vPoEjw1A1PJiKiUIFEx9+dILTLgxSSYIV2i6iq9c8MrtQlQ4EkLLtnPE463MjJSMRfbrmwzU1OnAG4A6Qldq/RMZ2JqgrcvOrrugU/jZAkCQ5X3S9FoQLCP+1/j3gDiitroJMlmHQSqlxsFowVi0kHo06Bx6fCXutpsobgbCadDJNeQY3Hh/P69UDPBKM/yVYFyqpqsfuoDUad/9e4AIIHFfBP8lfrVaFX/HPuBITzvoHaAL0iIfCbO/BjQJYl+FQBWZL8o8ZYQ0sa5R/16P+f0RfcEj5vSF2IgCRJSDLrUVjmwL5ie4dOLMtkhjSnsQU/zXoFqYkGnKxyQYV/Jtwatw+KLKGH2YDKTrxidlcnAbDG6ZFkNqCq1j/nDySBRvrPN5CaYITJoMDp8mLJjJyQL8c9x2y44/VtiDfqYNI3nBCw0ulGcWUNks3+fhSBTvBurxrylR347airq8kL9GlRBdA7KQ6JJn/HVlUVOG6rgSL5m2NMegVVtR4cO+1PmCVJgldV4anrGwSBkH51koTgYp0GRYYkCXi8AgkmHZ67bhQuHpQKIVBXK1lXg6mGPm+spvLsWs2QPyHwTdFpPPvxAZj0Ck5W1UKRpXqfGgD8cxOlJhjg8grMmzgQub2twVpdnxr63nuLbVj1RRFqPL7gbNMtkSQgJd4QPP8/PT8LDrcX7+8uRrxRFzIRo9PtQ1W9CS5lhC6IWv/fTarXaV2uO7/xRgU6xX/+vaqAT1VxQf9kJJh0wZmugzXOQsBe48G3x2xQJAmyLEHUmw47UNanCmT3TIBJr0DU1Q4fKXdCkiXICE0B/DXP/lrpnolG6BSpwb8V0EhNdb1a+wa1+6JebXbLpzumjIoMmypQ0cHfuUxmSJPGDUrF47Nygx3QbHULfo4ZkIzp52YgK9mMCocbj72/F6edbv56jqEEow41Hh+sQkAny2fa6lv4N1EkwBKnoKzKg5yMxOAoo4DhmRZkpyXU1dDJIe3/QgjUeHxIjNPDU9d5tNarNrjxAIE1pwIj/gI3bv+NUifLwXIun1rXedg/Wg5A8PMInKmKB+pqcJrojxC4CQMSZFnAqFPQK9HUaEIWDR6vqOscLdUbJXgmNlWc6XuikwUmDe7Z7C/q3klxePOrI6j1+KAoMnzNZKWBd5ElCQlGHeINOpQ5XJg4tCeSzQZ8fvBkg2T0lMMVTGYC/UvUuokg6//bKbL/cwQSRkWSABlIt8QFZ7tWVYEyhwu3TRiISUN6ojGN1fQGCCFQanc1aDaJ5DXRFNoNIDTZCSQ8qipCEqDGug746haMDRzDp/o7rge6LNTv1hAoc6isCs9+dABGgxL8/wA4M+q01qtCL0tINhui/rmbw2SGNKulBT+9XhXL3t8Hb11Pe3c4PyEpqkx6GXf+aBDe/PoISu0uWON0/sSi7ld9c/8iPcwGlFX5OxTOn5Td4KbQWA2dUZHh8qmodHqQYNThxjF98bevjqCi2gNZUptManWKHOxIGegTZtIrMOnr1rMS/k6dfVPiUWavhdunwiT79xt1Mmo8KvTymQ6lqgDks9Z5CNTK+O97Al6fv8lqaHrDRC2aAknf/mJ7MKnTy/7EKzCTs/8z+JCTYWkxluGZFmQlm3Ha6QaECKlxOptAXedU+BM/l+/Mja6pZNRcb9kNf5+O0HqkgMDUDwGqEIjT62AyhCagLd1YW76OGl5/kbwmmgLHVRo9M+3r/H498OHeUuSXVCHZrGuQyFU6G//x0d7klosQdV6BBT8nDfH/mqz/5ZFfWgVF8g+3ZcVMdKXEG4I376aY9P55Zu6YlI3HZ+UiJyMRNW6fvwlAkqBT/MOgG/u+jzcoMOhk5GQkNjsyIlBDl5ORCKfLizKHC06XN/i6eRP97z0yy9rgvQKJhyIF1p4SwXWmBABLnB5C+FcqL7W7kGBUcO/UIchOS/Av8yD8fQR6JpqgSBI8qgqvT4VR5+9l4/b618k6+0tWlhBcNysl3oBfTm6/mx5w5sabaNJBkWXIkn8hUa+qwlNXq6JT/DUn4dyAZVnCvVOHwKCT4fEJKM0U959jCUa9AqNeQqXTg+y0hOCPjvmTspFgVFBqd6HG4/PXwEj+mqJ6BwDQMPGVzsqiJPibdQJ9nAI31sD7Nael66ix6y+S13QFTf271f//pD0TuaZwNBN1WZsOnsS9b+2G2aig3OHucgtNxopJL2NQzwRUu304WeWC0+0NmWwvTq9DdlpCszMAn72OlkdVYTXpMSM3HdOGp7frDMDffF+Bf+44BpfPPzeNzemBy6vCV9c01Dc5DglGHcodbnjqmi/PnoTPPyzVF/xFfrrG7e+vJfzNajpZgk/4b/ReFXC6vcFOyFLdUOUhvRKweEZOh930oj3PzCufFeLZTw4G+yA1dieR4F/vK81igsurNjpsN2S+krrznZJgwLHTNXC4vCH9RwD/L3CDTqrrD4O65EeC2aCgZ6KpQQ1JaxILzgAcvsb+3TjPTDtiMtN91e8gatTLcNZ68X25M9ZhaVKiSQchBGo9KswGHdIsxrqbhg+nHG4YFAnXnJ+FMf2Tw55RNpY3gfpfxIE+F+kWE2Zf2Bc3XNgXQPMzADf2RT6wZ3ywv1Z3mQH4i0Mn8czHB3GkvBpun0CtxwtACk5GKEkSTHoF8Qal2RtdU6vd/2FjAb4rrYLHV9cppK4fU6DPT5rFiOsv7Iv+KWa8/Nn37XpjpYY4A3AHYjLTfTXWSW/PcVuswwpKNOrQI07B6RovnHUjr87LsiLepMeBupug032mc6UiBUY9dAwZgKJIMBt0wZvRxMGp+OzQqS5x02jrF3F3/UV+tqaWNGjL8gaNHfvsBPHsY/Lfo+thMlMPk5nu7ewmgYIyR7u/Z1KcAq+Kump8GT0TDbh5bH8kmvQosdXA6fJh97FKFJ2qbjIhCHwxf1FwCh/tK0WZvTZYNs1iwrTh6RiXnQIAOF3txmmnB0nxehwur8ZftxzG0coaqEKFQZbRL8WM68f0gyoEVm/+AWVVLggIxOlk9OlhxsisJBjrRpNkWE2wxhmQEm9Aj7qhtGffjHjTIKKOwGSmHiYzVL9JoNhW227vo1f8Q8NXz7mwyV+PAa1JCFqbPDRXnokIEWkFk5l6mMwQcOYmfsWLX7TL8U06CenWuC49ioGIqCNxOQOiswSGcLeFIgF9ks0YnpmIb4pOw17rgSxJsJp0GJze+tEgREQUHUxmiJoxpn8PjMhKQmZSHM7r2yM4AoXNNUREnQeTGaImXHd+Jp78aV6j+6JR00NERNHBGYCJmpBk5sroRERaoIlk5qWXXkL//v1hMpkwZswYfP3117EOibqB6bnpsQ6BiIjC0OmTmb///e9YuHAhli5dih07dmDkyJGYNm0aysrKYh0adXEj+yTFOgQiIgpDp09mnnvuOcybNw+33HILzjnnHPzxj3+E2WzGn//851iHRl0cO/QSEWlDp05m3G43tm/fjilTpgS3ybKMKVOmYOvWrTGMjLRq2+JLolqOiIhir1OPZjp16hR8Ph969eoVsr1Xr1747rvvGn2Ny+WCy+UKPrfb7e0aI2lLqtUMs16G09P0CkdmvYxUq7kDoyIiorbo1DUzkVi+fDmsVmvwkZWVFeuQqJPZ/9gMmPWNX/pmvYz9j83o4IiIiKgtOnUyk5qaCkVRcOLEiZDtJ06cQHp64yNNFi9eDJvNFnwcPXq0I0Iljdn/2AxsW3wJ0hKMMCoS0hKM2Lb4EiYyREQa1KmbmQwGA84//3xs2LABM2fOBACoqooNGzbgzjvvbPQ1RqMRRqOxA6MkrUq1mvH1g1NaLkhERJ1ap05mAGDhwoW4+eabMXr0aFx44YVYsWIFqqurccstt8Q6NCIiIuoEOn0yc9111+HkyZN4+OGHUVpailGjRmH9+vUNOgUTERFR9yQJIUSsg2hPrVlCnIiIiDqH1ty/O3UHYCIiIqKWMJkhIiIiTWMyQ0RERJrGZIaIiIg0jckMERERaRqTGSIiItK0Tj/PTFsFRp5zwUkiIiLtCNy3w5lBpssnM1VVVQDABSeJiIg0qKqqClartdkyXX7SPFVVUVxcjMTEREiSFOtwYs5utyMrKwtHjx7lJILg+Tgbz0dDPCeheD5C8XyEiub5EEKgqqoKmZmZkOXme8V0+ZoZWZbRp0+fWIfR6VgsFv6PVw/PRyiej4Z4TkLxfITi+QgVrfPRUo1MADsAExERkaYxmSEiIiJNYzLTzRiNRixduhRGozHWoXQKPB+heD4a4jkJxfMRiucjVKzOR5fvAExERERdG2tmiIiISNOYzBAREZGmMZkhIiIiTWMy08VUVFTgxhtvhMViQVJSEm677TY4HI5my991110YOnQo4uLi0LdvX9x9992w2Wwh5SRJavBYs2ZNe3+ciLz00kvo378/TCYTxowZg6+//rrZ8m+//TaGDRsGk8mE3NxcfPDBByH7hRB4+OGHkZGRgbi4OEyZMgWHDh1qz48QVa05H6+88gomTJiAHj16oEePHpgyZUqD8nPmzGlwLUyfPr29P0bUtOZ8rF69usFnNZlMIWW60/UxefLkRr8LLr/88mAZLV8fn332Ga644gpkZmZCkiS8++67Lb5m48aNOO+882A0GjFo0CCsXr26QZnWfid1Jq09J++88w5+/OMfo2fPnrBYLBg7diw++uijkDKPPPJIg2tk2LBhbQtUUJcyffp0MXLkSPHll1+Kzz//XAwaNEhcf/31TZbfs2ePuPrqq8W6detEQUGB2LBhgxg8eLC45pprQsoBEKtWrRIlJSXBR01NTXt/nFZbs2aNMBgM4s9//rPYt2+fmDdvnkhKShInTpxotPzmzZuFoijiqaeeEvv37xcPPvig0Ov1Ys+ePcEyTzzxhLBareLdd98Vu3fvFldeeaUYMGBAp/z8Z2vt+bjhhhvESy+9JHbu3Cny8/PFnDlzhNVqFceOHQuWufnmm8X06dNDroWKioqO+kht0trzsWrVKmGxWEI+a2lpaUiZ7nR9lJeXh5yLvXv3CkVRxKpVq4JltHx9fPDBB+KBBx4Q77zzjgAg1q5d22z577//XpjNZrFw4UKxf/9+8cILLwhFUcT69euDZVp7jjub1p6TX/3qV+LJJ58UX3/9tTh48KBYvHix0Ov1YseOHcEyS5cuFcOHDw+5Rk6ePNmmOJnMdCH79+8XAMQ333wT3Pbhhx8KSZLE8ePHwz7OW2+9JQwGg/B4PMFt4VzEncGFF14oFixYEHzu8/lEZmamWL58eaPlr732WnH55ZeHbBszZoy44447hBBCqKoq0tPTxdNPPx3cX1lZKYxGo3jzzTfb4RNEV2vPx9m8Xq9ITEwUf/nLX4Lbbr75ZnHVVVdFO9QO0drzsWrVKmG1Wps8Xne/Pp5//nmRmJgoHA5HcJuWr4/6wvnO+81vfiOGDx8esu26664T06ZNCz5v6znuTCK9D5xzzjli2bJlwedLly4VI0eOjF5gQgg2M3UhW7duRVJSEkaPHh3cNmXKFMiyjK+++irs49hsNlgsFuh0oatdLFiwAKmpqbjwwgvx5z//OayVTDuS2+3G9u3bMWXKlOA2WZYxZcoUbN26tdHXbN26NaQ8AEybNi1YvqioCKWlpSFlrFYrxowZ0+QxO4tIzsfZnE4nPB4PkpOTQ7Zv3LgRaWlpGDp0KObPn4/y8vKoxt4eIj0fDocD/fr1Q1ZWFq666irs27cvuK+7Xx9/+tOfMHv2bMTHx4ds1+L1EYmWvj+icY61TlVVVFVVNfgOOXToEDIzMzFw4EDceOONOHLkSJveh8lMF1JaWoq0tLSQbTqdDsnJySgtLQ3rGKdOncJjjz2G22+/PWT7o48+irfeeguffPIJrrnmGvzyl7/ECy+8ELXYo+HUqVPw+Xzo1atXyPZevXo1+flLS0ubLR/4szXH7CwiOR9nu++++5CZmRnyZTx9+nS89tpr2LBhA5588kls2rQJM2bMgM/ni2r80RbJ+Rg6dCj+/Oc/47333sNf//pXqKqKcePG4dixYwC69/Xx9ddfY+/evZg7d27Idq1eH5Fo6vvDbrejpqYmKv8Pat0zzzwDh8OBa6+9NrhtzJgxWL16NdavX4+VK1eiqKgIEyZMQFVVVcTv0+UXmuwK7r//fjz55JPNlsnPz2/z+9jtdlx++eU455xz8Mgjj4Tse+ihh4J/z8vLQ3V1NZ5++mncfffdbX5f6pyeeOIJrFmzBhs3bgzp9Dp79uzg33NzczFixAhkZ2dj48aNuPTSS2MRarsZO3Ysxo4dG3w+btw45OTk4OWXX8Zjjz0Ww8hi709/+hNyc3Nx4YUXhmzvTtcHNe+NN97AsmXL8N5774X80J4xY0bw7yNGjMCYMWPQr18/vPXWW7jtttsiei/WzGjAokWLkJ+f3+xj4MCBSE9PR1lZWchrvV4vKioqkJ6e3ux7VFVVYfr06UhMTMTatWuh1+ubLT9mzBgcO3YMLperzZ8vWlJTU6EoCk6cOBGy/cSJE01+/vT09GbLB/5szTE7i0jOR8AzzzyDJ554Ah9//DFGjBjRbNmBAwciNTUVBQUFbY65PbXlfATo9Xrk5eUFP2t3vT6qq6uxZs2asG48Wrk+ItHU94fFYkFcXFxUrjmtWrNmDebOnYu33nqrQVPc2ZKSkjBkyJA2XSNMZjSgZ8+eGDZsWLMPg8GAsWPHorKyEtu3bw++9j//+Q9UVcWYMWOaPL7dbsfUqVNhMBiwbt26BkNPG7Nr1y706NGjU61HYjAYcP7552PDhg3BbaqqYsOGDSG/rusbO3ZsSHkA+OSTT4LlBwwYgPT09JAydrsdX331VZPH7CwiOR8A8NRTT+Gxxx7D+vXrQ/pfNeXYsWMoLy9HRkZGVOJuL5Gej/p8Ph/27NkT/Kzd8foA/NMZuFwu/OxnP2vxfbRyfUSipe+PaFxzWvTmm2/illtuwZtvvhkybL8pDocDhYWFbbtGotqdmGJu+vTpIi8vT3z11Vfiiy++EIMHDw4Zmn3s2DExdOhQ8dVXXwkhhLDZbGLMmDEiNzdXFBQUhAyV83q9Qggh1q1bJ1555RWxZ88ecejQIfGHP/xBmM1m8fDDD8fkMzZnzZo1wmg0itWrV4v9+/eL22+/XSQlJQWH0/785z8X999/f7D85s2bhU6nE88884zIz88XS5cubXRodlJSknjvvffEt99+K6666ipNDb1tzfl44oknhMFgEP/4xz9CroWqqiohhBBVVVXi3nvvFVu3bhVFRUXi008/Feedd54YPHiwqK2tjclnbI3Wno9ly5aJjz76SBQWFort27eL2bNnC5PJJPbt2xcs052uj4CLL75YXHfddQ22a/36qKqqEjt37hQ7d+4UAMRzzz0ndu7cKQ4fPiyEEOL+++8XP//5z4PlA0Ozf/3rX4v8/Hzx0ksvNTo0u7lz3Nm19pz87W9/EzqdTrz00ksh3yGVlZXBMosWLRIbN24URUVFYvPmzWLKlCkiNTVVlJWVRRwnk5kupry8XFx//fUiISFBWCwWccsttwRvREIIUVRUJACI//73v0IIIf773/8KAI0+ioqKhBD+4d2jRo0SCQkJIj4+XowcOVL88Y9/FD6fLwafsGUvvPCC6Nu3rzAYDOLCCy8UX375ZXDfpEmTxM033xxS/q233hJDhgwRBoNBDB8+XPz73/8O2a+qqnjooYdEr169hNFoFJdeeqk4cOBAR3yUqGjN+ejXr1+j18LSpUuFEEI4nU4xdepU0bNnT6HX60W/fv3EvHnzNPPFLETrzsc999wTLNurVy9x2WWXhcyXIUT3uj6EEOK7774TAMTHH3/c4Fhavz6a+j4MnIObb75ZTJo0qcFrRo0aJQwGgxg4cGDInDsBzZ3jzq6152TSpEnNlhfCP3w9IyNDGAwG0bt3b3HdddeJgoKCNsXJVbOJiIhI09hnhoiIiDSNyQwRERFpGpMZIiIi0jQmM0RERKRpTGaIiIhI05jMEBERkaYxmSEiIiJNYzJDRERErfbZZ5/hiiuuQGZmJiRJwrvvvtvqYwgh8Mwzz2DIkCEwGo3o3bs3/vd//7fVx2EyQ0Rd2ubNm5Gbmwu9Xo+ZM2di48aNkCQJlZWVsQ4tqH///lixYkWswyBqlerqaowcORIvvfRSxMf41a9+hVdffRXPPPMMvvvuO6xbt67BSuzh0EUcARGRBixcuBCjRo3Chx9+iISEBJjNZpSUlMBqtcY6NCJNmzFjBmbMmNHkfpfLhQceeABvvvkmKisrce655+LJJ5/E5MmTAQD5+flYuXIl9u7di6FDhwLwL94aCdbMEFGXVlhYiB/96Efo06cPkpKSYDAYkJ6eDkmSGi3v8/mgqmoHR0nU9dx5553YunUr1qxZg2+//RY//elPMX36dBw6dAgA8K9//QsDBw7E+++/jwEDBqB///6YO3cuKioqWv1eTGaIupnJkyfj7rvvxm9+8xskJycjPT0djzzySHB/ZWUl5s6di549e8JiseBHP/oRdu/eDQCw2WxQFAXbtm0DAKiqiuTkZFx00UXB1//1r39FVlZWWLEcO3YM119/PZKTkxEfH4/Ro0fjq6++Cu5fuXIlsrOzYTAYMHToULz++ushr5ckCa+++ipmzZoFs9mMwYMHY926dQCAH374AZIkoby8HLfeeiskScLq1asbNDOtXr0aSUlJWLduHc455xwYjUYcOXIE/fv3x29/+1vcdNNNSEhIQL9+/bBu3TqcPHkSV111FRISEjBixIjguQj44osvMGHCBMTFxSErKwt33303qqurg/vLyspwxRVXIC4uDgMGDMDf/va3sM4VkZYcOXIEq1atwttvv40JEyYgOzsb9957Ly6++GKsWrUKAPD999/j8OHDePvtt/Haa69h9erV2L59O37yk5+0/g3btEwlEWnOpEmThMViEY888og4ePCg+Mtf/iIkSQqugjxlyhRxxRVXiG+++UYcPHhQLFq0SKSkpIjy8nIhhBDnnXeeePrpp4UQQuzatUskJycLg8EQXJ197ty54sYbb2wxjqqqKjFw4EAxYcIE8fnnn4tDhw6Jv//972LLli1CCCHeeecdodfrxUsvvSQOHDggnn32WaEoivjPf/4TPAYA0adPH/HGG2+IQ4cOibvvvlskJCSI8vJy4fV6RUlJibBYLGLFihWipKREOJ3O4CrAp0+fFkIIsWrVKqHX68W4cePE5s2bxXfffSeqq6tFv379RHJysvjjH/8oDh48KObPny8sFouYPn26eOutt8SBAwfEzJkzRU5OjlBVVQghREFBgYiPjxfPP/+8OHjwoNi8ebPIy8sTc+bMCcY8Y8YMMXLkSLF161axbds2MW7cOBEXFyeef/75tv3DEsUQALF27drg8/fff18AEPHx8SEPnU4nrr32WiGEEPPmzRMAQlaZ3759uwAgvvvuu9a9f1Q+BRFpxqRJk8TFF18csu2CCy4Q9913n/j888+FxWIRtbW1Ifuzs7PFyy+/LIQQYuHCheLyyy8XQgixYsUKcd1114mRI0eKDz/8UAghxKBBg8T//d//tRjHyy+/LBITE4NJ0tnGjRsn5s2bF7Ltpz/9qbjsssuCzwGIBx98MPjc4XAIAMFYhBDCarWKVatWBZ83lswAELt27Qp5r379+omf/exnweclJSUCgHjooYeC27Zu3SoAiJKSEiGEELfddpu4/fbbQ47z+eefC1mWRU1NjThw4IAAIL7++uvg/vz8fAGAyQxp2tnJzJo1a4SiKOK7774Thw4dCnkE/n95+OGHhU6nCzmO0+kUAII/rsLFDsBE3dCIESNCnmdkZKCsrAy7d++Gw+FASkpKyP6amhoUFhYCACZNmoQ//elP8Pl82LRpE6ZOnYr09HRs3LgRI0aMQEFBQbCDX3N27dqFvLw8JCcnN7o/Pz8ft99+e8i28ePH43e/+12TnyU+Ph4WiwVlZWUtvn99BoOhwTk5+9i9evUCAOTm5jbYVlZWhvT0dOzevRvffvttSNOREAKqqqKoqAgHDx6ETqfD+eefH9w/bNgwJCUltSpeos4uLy8PPp8PZWVlmDBhQqNlxo8fD6/Xi8LCQmRnZwMADh48CADo169fq96PyQxRN6TX60OeS5IEVVXhcDiQkZGBjRs3NnhN4IY7ceJEVFVVYceOHfjss8/w+OOPIz09HU888QRGjhyJzMxMDB48uMUY4uLiovFRmvwsrREXF9doh+D6xw7sb2xb4P0cDgfuuOMO3H333Q2O1bdv3+AXNVFX4HA4UFBQEHxeVFSEXbt2ITk5GUOGDMGNN96Im266Cc8++yzy8vJw8uRJbNiwASNGjMDll1+OKVOm4LzzzsOtt96KFStWQFVVLFiwAD/+8Y8xZMiQVsXCDsBEFHTeeeehtLQUOp0OgwYNCnmkpqYC8Cc1I0aMwIsvvgi9Xo9hw4Zh4sSJ2LlzJ95//31MmjQprPcaMWIEdu3a1eTIhZycHGzevDlk2+bNm3HOOee07UO2o/POOw/79+9vcO4GDRoEg8GAYcOGwev1Yvv27cHXHDhwoFPNeUMUrm3btiEvLw95eXkA/NMg5OXl4eGHHwYArFq1CjfddBMWLVqEoUOHYubMmfjmm2/Qt29fAIAsy/jXv/6F1NRUTJw4EZdffjlycnKwZs2aVsfCmhkiCpoyZQrGjh2LmTNn4qmnnsKQIUNQXFyMf//735g1axZGjx4NwD8i6oUXXgiOOkhOTkZOTg7+/ve/hz2B1vXXX4/HH38cM2fOxPLly5GRkYGdO3ciMzMTY8eOxa9//Wtce+21yMvLw5QpU/Cvf/0L77zzDj799NN2+/xtdd999+Giiy7CnXfeiblz5yI+Ph779+/HJ598ghdffBFDhw7F9OnTcccdd2DlypXQ6XS45557olZLRdSRJk+eDH93mcbp9XosW7YMy5Yta7JMZmYm/vnPf7Y5FtbMEFGQJEn44IMPMHHiRNxyyy0YMmQIZs+ejcOHDwf7hwD+fjM+ny+kb8zkyZMbbGuOwWDAxx9/jLS0NFx22WXIzc3FE088AUVRAAAzZ87E7373OzzzzDMYPnw4Xn75ZaxatSrs48fCiBEjsGnTJhw8eBATJkwI/krNzMwMllm1ahUyMzMxadIkXH311bj99tuRlpYWw6iJtE8SzaVVRERERJ0ca2aIiIhI05jMEFG7ePzxx5GQkNDoo7n1XIiIWovNTETULioqKpocqRQXF4fevXt3cERE1FUxmSEiIiJNYzMTERERaRqTGSIiItI0JjNERESkaUxmiIiISNOYzBAREZGmMZkhIiIiTWMyQ0RERJrGZIaIiIg07f8DiIH1XWZR1q0AAAAASUVORK5CYII=", + "image/png": "", "text/plain": [ "
" ] diff --git a/noxfile.py b/noxfile.py index 1d8ab6c1fd..6b36995480 100644 --- a/noxfile.py +++ b/noxfile.py @@ -39,7 +39,7 @@ DEFAULT_PYTHON_VERSION = "3.10" -UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11"] +UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12"] UNIT_TEST_STANDARD_DEPENDENCIES = [ "mock", "asyncmock", @@ -54,7 +54,7 @@ UNIT_TEST_EXTRAS: List[str] = [] UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {} -SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.12"] SYSTEM_TEST_STANDARD_DEPENDENCIES = [ "jinja2", "mock", @@ -556,22 +556,11 @@ def prerelease(session: nox.sessions.Session, tests_path): "--prefer-binary", "--pre", "--upgrade", - # TODO(shobs): Remove excluding version 2.1.4 after - # https://github.com/pandas-dev/pandas/issues/56463 is resolved. - # - # TODO(shobs): Remove excluding version 2.2.0rc0 after - # https://github.com/pandas-dev/pandas/issues/56646 and - # https://github.com/pandas-dev/pandas/issues/56651 are resolved. - # - # TODO(shobs): Remove excluding version 2.2.0 after - # https://github.com/googleapis/python-bigquery-dataframes/issues/341 - # https://github.com/googleapis/python-bigquery-dataframes/issues/337 - # are resolved - # # We exclude each version individually so that we can continue to test # some prerelease packages. See: # https://github.com/googleapis/python-bigquery-dataframes/pull/268#discussion_r1423205172 - "pandas!=2.1.4, !=2.2.0rc0, !=2.2.0, !=2.2.1", + # "pandas!=2.1.4, !=2.2.0rc0, !=2.2.0, !=2.2.1", + "pandas", ) already_installed.add("pandas") diff --git a/owlbot.py b/owlbot.py index 77479401d5..f804859689 100644 --- a/owlbot.py +++ b/owlbot.py @@ -30,8 +30,8 @@ # Add templated files # ---------------------------------------------------------------------------- templated_files = common.py_library( - unit_test_python_versions=["3.9", "3.10", "3.11"], - system_test_python_versions=["3.9", "3.11"], + unit_test_python_versions=["3.9", "3.10", "3.11", "3.12"], + system_test_python_versions=["3.9", "3.11", "3.12"], cov_level=35, intersphinx_dependencies={ "pandas": "https://pandas.pydata.org/pandas-docs/stable/", diff --git a/setup.py b/setup.py index 5258a7d6f9..768fac530c 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ "google-cloud-storage >=2.0.0", "ibis-framework[bigquery] >=8.0.0,<9.0.0dev", # TODO: Relax upper bound once we have fixed `system_prerelease` tests. - "pandas >=1.5.0,<2.1.4", + "pandas >=1.5.0", "pydata-google-auth >=1.8.2", "requests >=2.27.1", "scikit-learn >=1.2.2", @@ -113,6 +113,7 @@ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Operating System :: OS Independent", "Topic :: Internet", ], diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/system/conftest.py b/tests/system/conftest.py index e6b241c9a3..a108ff4a8e 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -35,6 +35,7 @@ import test_utils.prefixer import bigframes +import bigframes.dataframe import tests.system.utils # Use this to control the number of cloud functions being deleted in a single @@ -357,8 +358,6 @@ def nested_pandas_df() -> pd.DataFrame: DATA_DIR / "nested.jsonl", lines=True, ) - tests.system.utils.convert_pandas_dtypes(df, bytes_col=True) - df = df.set_index("rowindex") return df diff --git a/tests/system/large/ml/test_cluster.py b/tests/system/large/ml/test_cluster.py index 9244c4b9f1..b633ca4ea2 100644 --- a/tests/system/large/ml/test_cluster.py +++ b/tests/system/large/ml/test_cluster.py @@ -19,11 +19,11 @@ from tests.system.utils import assert_pandas_df_equal -@pytest.mark.flaky(retries=2, delay=120) +@pytest.mark.flaky(retries=2) def test_cluster_configure_fit_score_predict( session, penguins_df_default_index, dataset_id ): - model = cluster.KMeans(n_clusters=3) + model = cluster.KMeans(n_clusters=3, init="random") df = penguins_df_default_index.dropna()[ [ @@ -118,3 +118,47 @@ def test_cluster_configure_fit_score_predict( in reloaded_model._bqml_model.model_name ) assert reloaded_model.n_clusters == 3 + assert reloaded_model.init == "RANDOM" + assert reloaded_model.distance_type == "EUCLIDEAN" + assert reloaded_model.max_iter == 20 + assert reloaded_model.tol == 0.01 + + +def test_cluster_configure_fit_load_params(penguins_df_default_index, dataset_id): + model = cluster.KMeans( + n_clusters=4, + init="random", + distance_type="cosine", + max_iter=30, + tol=0.001, + ) + + df = penguins_df_default_index.dropna()[ + [ + "culmen_length_mm", + "culmen_depth_mm", + "flipper_length_mm", + "sex", + ] + ] + + # TODO(swast): How should we handle the default index? Currently, we get: + # "Column bigframes_index_0_z is not found in the input data to the + # EVALUATE function." + df = df.reset_index(drop=True) + + model.fit(df) + + # save, load, check n_clusters to ensure configuration was kept + reloaded_model = model.to_gbq( + f"{dataset_id}.temp_configured_cluster_model", replace=True + ) + assert ( + f"{dataset_id}.temp_configured_cluster_model" + in reloaded_model._bqml_model.model_name + ) + assert reloaded_model.n_clusters == 4 + assert reloaded_model.init == "RANDOM" + assert reloaded_model.distance_type == "COSINE" + assert reloaded_model.max_iter == 30 + assert reloaded_model.tol == 0.001 diff --git a/tests/system/large/ml/test_decomposition.py b/tests/system/large/ml/test_decomposition.py index 953287def2..264b95a92e 100644 --- a/tests/system/large/ml/test_decomposition.py +++ b/tests/system/large/ml/test_decomposition.py @@ -84,3 +84,108 @@ def test_decomposition_configure_fit_score_predict( in reloaded_model._bqml_model.model_name ) assert reloaded_model.n_components == 3 + + +def test_decomposition_configure_fit_score_predict_params( + session, penguins_df_default_index, dataset_id +): + model = decomposition.PCA(n_components=5, svd_solver="randomized") + model.fit(penguins_df_default_index) + + new_penguins = session.read_pandas( + pd.DataFrame( + { + "tag_number": [1633, 1672, 1690], + "species": [ + "Adelie Penguin (Pygoscelis adeliae)", + "Gentoo penguin (Pygoscelis papua)", + "Adelie Penguin (Pygoscelis adeliae)", + ], + "island": ["Dream", "Biscoe", "Torgersen"], + "culmen_length_mm": [37.8, 46.5, 41.1], + "culmen_depth_mm": [18.1, 14.8, 18.6], + "flipper_length_mm": [193.0, 217.0, 189.0], + "body_mass_g": [3750.0, 5200.0, 3325.0], + "sex": ["MALE", "FEMALE", "MALE"], + } + ).set_index("tag_number") + ) + + # Check score to ensure the model was fitted + score_result = model.score(new_penguins).to_pandas() + score_expected = pd.DataFrame( + { + "total_explained_variance_ratio": [0.932897], + }, + dtype="Float64", + ) + score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) + + pd.testing.assert_frame_equal( + score_result, score_expected, check_exact=False, rtol=0.1 + ) + + result = model.predict(new_penguins).to_pandas() + expected = pd.DataFrame( + { + "principal_component_1": [-1.459, 2.258, -1.685], + "principal_component_2": [-1.120, -1.351, -0.874], + "principal_component_3": [-0.646, 0.443, -0.704], + "principal_component_4": [-0.539, 0.234, -0.571], + "principal_component_5": [-0.876, 0.122, 0.609], + }, + dtype="Float64", + index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), + ) + + tests.system.utils.assert_pandas_df_equal_pca( + result, + expected, + check_exact=False, + rtol=0.1, + ) + + # save, load, check n_components to ensure configuration was kept + reloaded_model = model.to_gbq( + f"{dataset_id}.temp_configured_pca_model", replace=True + ) + assert ( + f"{dataset_id}.temp_configured_pca_model" + in reloaded_model._bqml_model.model_name + ) + assert reloaded_model.n_components == 5 + assert reloaded_model.svd_solver == "RANDOMIZED" + + +def test_decomposition_configure_fit_load_float_component( + penguins_df_default_index, dataset_id +): + model = decomposition.PCA(n_components=0.2) + model.fit(penguins_df_default_index) + + # save, load, check n_components to ensure configuration was kept + reloaded_model = model.to_gbq( + f"{dataset_id}.temp_configured_pca_model", replace=True + ) + assert ( + f"{dataset_id}.temp_configured_pca_model" + in reloaded_model._bqml_model.model_name + ) + assert reloaded_model.n_components == 0.2 + + +def test_decomposition_configure_fit_load_none_component( + penguins_df_default_index, dataset_id +): + model = decomposition.PCA(n_components=None) + model.fit(penguins_df_default_index) + + # save, load, check n_components. Here n_components is the column size of the training input. + reloaded_model = model.to_gbq( + f"{dataset_id}.temp_configured_pca_model", replace=True + ) + assert ( + f"{dataset_id}.temp_configured_pca_model" + in reloaded_model._bqml_model.model_name + ) + assert reloaded_model.n_components == 7 diff --git a/tests/system/large/ml/test_ensemble.py b/tests/system/large/ml/test_ensemble.py index b98d7a757c..2403644a42 100644 --- a/tests/system/large/ml/test_ensemble.py +++ b/tests/system/large/ml/test_ensemble.py @@ -20,7 +20,7 @@ import bigframes.ml.ensemble -@pytest.mark.flaky(retries=2, delay=120) +@pytest.mark.flaky(retries=2) def test_xgbregressor_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.XGBRegressor() @@ -64,7 +64,7 @@ def test_xgbregressor_default_params(penguins_df_default_index, dataset_id): ) -@pytest.mark.flaky(retries=2, delay=120) +@pytest.mark.flaky(retries=2) def test_xgbregressor_dart_booster_multiple_params( penguins_df_default_index, dataset_id ): @@ -75,14 +75,14 @@ def test_xgbregressor_dart_booster_multiple_params( colsample_bytree=0.95, colsample_bylevel=0.95, colsample_bynode=0.95, - num_parallel_tree=2, + n_estimators=2, max_depth=4, subsample=0.95, reg_alpha=0.0001, reg_lambda=0.0001, learning_rate=0.015, max_iterations=4, - min_rel_progress=0.02, + tol=0.02, ) df = penguins_df_default_index.dropna().sample(n=70) @@ -126,20 +126,19 @@ def test_xgbregressor_dart_booster_multiple_params( assert reloaded_model.colsample_bytree == 0.95 assert reloaded_model.colsample_bylevel == 0.95 assert reloaded_model.colsample_bynode == 0.95 - assert reloaded_model.early_stop is True assert reloaded_model.subsample == 0.95 assert reloaded_model.reg_alpha == 0.0001 assert reloaded_model.reg_lambda == 0.0001 assert reloaded_model.learning_rate == 0.015 assert reloaded_model.max_iterations == 4 - assert reloaded_model.min_rel_progress == 0.02 + assert reloaded_model.tol == 0.02 assert reloaded_model.gamma == 0.0 assert reloaded_model.max_depth == 4 assert reloaded_model.min_tree_child_weight == 2 - assert reloaded_model.num_parallel_tree == 2 + assert reloaded_model.n_estimators == 2 -@pytest.mark.flaky(retries=2, delay=120) +@pytest.mark.flaky(retries=2) def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.XGBClassifier() @@ -179,7 +178,7 @@ def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id): ) -# @pytest.mark.flaky(retries=2, delay=120) +@pytest.mark.flaky(retries=2) def test_xgbclassifier_dart_booster_multiple_params( penguins_df_default_index, dataset_id ): @@ -190,14 +189,14 @@ def test_xgbclassifier_dart_booster_multiple_params( colsample_bytree=0.95, colsample_bylevel=0.95, colsample_bynode=0.95, - num_parallel_tree=2, + n_estimators=2, max_depth=4, subsample=0.95, reg_alpha=0.0001, reg_lambda=0.0001, learning_rate=0.015, max_iterations=4, - min_rel_progress=0.02, + tol=0.02, ) df = penguins_df_default_index.dropna().sample(n=70) @@ -240,20 +239,19 @@ def test_xgbclassifier_dart_booster_multiple_params( assert reloaded_model.colsample_bytree == 0.95 assert reloaded_model.colsample_bylevel == 0.95 assert reloaded_model.colsample_bynode == 0.95 - assert reloaded_model.early_stop is True assert reloaded_model.subsample == 0.95 assert reloaded_model.reg_alpha == 0.0001 assert reloaded_model.reg_lambda == 0.0001 assert reloaded_model.learning_rate == 0.015 assert reloaded_model.max_iterations == 4 - assert reloaded_model.min_rel_progress == 0.02 + assert reloaded_model.tol == 0.02 assert reloaded_model.gamma == 0.0 assert reloaded_model.max_depth == 4 assert reloaded_model.min_tree_child_weight == 2 - assert reloaded_model.num_parallel_tree == 2 + assert reloaded_model.n_estimators == 2 -@pytest.mark.flaky(retries=2, delay=120) +@pytest.mark.flaky(retries=2) def test_randomforestregressor_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.RandomForestRegressor() @@ -294,7 +292,7 @@ def test_randomforestregressor_default_params(penguins_df_default_index, dataset ) -@pytest.mark.flaky(retries=2, delay=120) +@pytest.mark.flaky(retries=2) def test_randomforestregressor_multiple_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.RandomForestRegressor( tree_method="auto", @@ -302,12 +300,12 @@ def test_randomforestregressor_multiple_params(penguins_df_default_index, datase colsample_bytree=0.95, colsample_bylevel=0.95, colsample_bynode=0.95, - num_parallel_tree=90, + n_estimators=90, max_depth=14, subsample=0.95, reg_alpha=0.0001, reg_lambda=0.0001, - min_rel_progress=0.02, + tol=0.02, ) df = penguins_df_default_index.dropna().sample(n=70) @@ -349,19 +347,18 @@ def test_randomforestregressor_multiple_params(penguins_df_default_index, datase assert reloaded_model.colsample_bytree == 0.95 assert reloaded_model.colsample_bylevel == 0.95 assert reloaded_model.colsample_bynode == 0.95 - assert reloaded_model.early_stop is True assert reloaded_model.subsample == 0.95 assert reloaded_model.reg_alpha == 0.0001 assert reloaded_model.reg_lambda == 0.0001 - assert reloaded_model.min_rel_progress == 0.02 + assert reloaded_model.tol == 0.02 assert reloaded_model.gamma == 0.0 assert reloaded_model.max_depth == 14 assert reloaded_model.min_tree_child_weight == 2 - assert reloaded_model.num_parallel_tree == 90 + assert reloaded_model.n_estimators == 90 assert reloaded_model.enable_global_explain is False -@pytest.mark.flaky(retries=2, delay=120) +@pytest.mark.flaky(retries=2) def test_randomforestclassifier_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.RandomForestClassifier() @@ -401,7 +398,7 @@ def test_randomforestclassifier_default_params(penguins_df_default_index, datase ) -@pytest.mark.flaky(retries=2, delay=120) +@pytest.mark.flaky(retries=2) def test_randomforestclassifier_multiple_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.RandomForestClassifier( tree_method="AUTO", @@ -409,12 +406,12 @@ def test_randomforestclassifier_multiple_params(penguins_df_default_index, datas colsample_bytree=0.95, colsample_bylevel=0.95, colsample_bynode=0.95, - num_parallel_tree=90, + n_estimators=90, max_depth=14, subsample=0.95, reg_alpha=0.0001, reg_lambda=0.0001, - min_rel_progress=0.02, + tol=0.02, ) df = penguins_df_default_index.dropna().sample(n=70) @@ -455,13 +452,12 @@ def test_randomforestclassifier_multiple_params(penguins_df_default_index, datas assert reloaded_model.colsample_bytree == 0.95 assert reloaded_model.colsample_bylevel == 0.95 assert reloaded_model.colsample_bynode == 0.95 - assert reloaded_model.early_stop is True assert reloaded_model.subsample == 0.95 assert reloaded_model.reg_alpha == 0.0001 assert reloaded_model.reg_lambda == 0.0001 - assert reloaded_model.min_rel_progress == 0.02 + assert reloaded_model.tol == 0.02 assert reloaded_model.gamma == 0.0 assert reloaded_model.max_depth == 14 assert reloaded_model.min_tree_child_weight == 2 - assert reloaded_model.num_parallel_tree == 90 + assert reloaded_model.n_estimators == 90 assert reloaded_model.enable_global_explain is False diff --git a/tests/system/large/ml/test_forecasting.py b/tests/system/large/ml/test_forecasting.py index 2bb136b0f2..b333839e2e 100644 --- a/tests/system/large/ml/test_forecasting.py +++ b/tests/system/large/ml/test_forecasting.py @@ -77,3 +77,44 @@ def test_arima_plus_model_fit_summary(time_series_df_default_index, dataset_id): assert ( f"{dataset_id}.temp_arima_plus_model" in reloaded_model._bqml_model.model_name ) + + +def test_arima_plus_model_fit_params(time_series_df_default_index, dataset_id): + model = forecasting.ARIMAPlus( + horizon=100, + auto_arima=True, + auto_arima_max_order=4, + auto_arima_min_order=1, + data_frequency="daily", + holiday_region="US", + clean_spikes_and_dips=False, + adjust_step_changes=False, + time_series_length_fraction=0.5, + min_time_series_length=10, + trend_smoothing_window_size=5, + decompose_time_series=False, + ) + + X_train = time_series_df_default_index[["parsed_date"]] + y_train = time_series_df_default_index[["total_visits"]] + model.fit(X_train, y_train) + + # save, load to ensure configuration was kept + reloaded_model = model.to_gbq(f"{dataset_id}.temp_arima_plus_model", replace=True) + assert ( + f"{dataset_id}.temp_arima_plus_model" in reloaded_model._bqml_model.model_name + ) + + assert reloaded_model.horizon == 100 + assert reloaded_model.auto_arima is True + assert reloaded_model.auto_arima_max_order == 4 + # TODO(garrettwu): now BQML doesn't populate auto_arima_min_order + # assert reloaded_model.auto_arima_min_order == 1 + assert reloaded_model.data_frequency == "DAILY" + assert reloaded_model.holiday_region == "US" + assert reloaded_model.clean_spikes_and_dips is False + assert reloaded_model.adjust_step_changes is False + assert reloaded_model.time_series_length_fraction == 0.5 + assert reloaded_model.min_time_series_length == 10 + assert reloaded_model.trend_smoothing_window_size == 5 + assert reloaded_model.decompose_time_series is False diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py index a0f4182e6f..99121e4a31 100644 --- a/tests/system/large/ml/test_linear_model.py +++ b/tests/system/large/ml/test_linear_model.py @@ -58,20 +58,28 @@ def test_linear_regression_configure_fit_score(penguins_df_default_index, datase assert reloaded_model.optimize_strategy == "NORMAL_EQUATION" assert reloaded_model.fit_intercept is True assert reloaded_model.calculate_p_values is False - assert reloaded_model.early_stop is True assert reloaded_model.enable_global_explain is False + assert reloaded_model.l1_reg is None assert reloaded_model.l2_reg == 0.0 - assert reloaded_model.learn_rate_strategy == "line_search" - assert reloaded_model.ls_init_learn_rate == 0.1 + assert reloaded_model.learning_rate is None + assert reloaded_model.learning_rate_strategy == "line_search" + assert reloaded_model.ls_init_learning_rate is None assert reloaded_model.max_iterations == 20 - assert reloaded_model.min_rel_progress == 0.01 + assert reloaded_model.tol == 0.01 def test_linear_regression_customized_params_fit_score( penguins_df_default_index, dataset_id ): model = bigframes.ml.linear_model.LinearRegression( - fit_intercept=False, l2_reg=0.1, min_rel_progress=0.01 + fit_intercept=False, + l2_reg=0.2, + tol=0.02, + l1_reg=0.2, + max_iterations=30, + optimize_strategy="batch_gradient_descent", + learning_rate_strategy="constant", + learning_rate=0.2, ) df = penguins_df_default_index.dropna() @@ -92,12 +100,12 @@ def test_linear_regression_customized_params_fit_score( result = model.score(X_train, y_train).to_pandas() expected = pd.DataFrame( { - "mean_absolute_error": [226.108411], - "mean_squared_error": [80459.668456], - "mean_squared_log_error": [0.00497], - "median_absolute_error": [171.618872], - "r2_score": [0.875415], - "explained_variance": [0.875417], + "mean_absolute_error": [240], + "mean_squared_error": [91197], + "mean_squared_log_error": [0.00573], + "median_absolute_error": [197], + "r2_score": [0.858], + "explained_variance": [0.8588], }, dtype="Float64", ) @@ -109,16 +117,20 @@ def test_linear_regression_customized_params_fit_score( assert ( f"{dataset_id}.temp_configured_model" in reloaded_model._bqml_model.model_name ) - assert reloaded_model.optimize_strategy == "NORMAL_EQUATION" + assert reloaded_model.optimize_strategy == "BATCH_GRADIENT_DESCENT" assert reloaded_model.fit_intercept is False assert reloaded_model.calculate_p_values is False - assert reloaded_model.early_stop is True assert reloaded_model.enable_global_explain is False - assert reloaded_model.l2_reg == 0.1 - assert reloaded_model.learn_rate_strategy == "line_search" - assert reloaded_model.ls_init_learn_rate == 0.1 - assert reloaded_model.max_iterations == 20 - assert reloaded_model.min_rel_progress == 0.01 + assert reloaded_model.l1_reg == 0.2 + assert reloaded_model.l2_reg == 0.2 + assert reloaded_model.ls_init_learning_rate is None + assert reloaded_model.max_iterations == 30 + assert reloaded_model.tol == 0.02 + assert reloaded_model.learning_rate_strategy == "CONSTANT" + assert reloaded_model.learning_rate == 0.2 + + +# TODO(garrettwu): add tests for param warm_start. Requires a trained model. def test_logistic_regression_configure_fit_score(penguins_df_default_index, dataset_id): @@ -163,14 +175,22 @@ def test_logistic_regression_configure_fit_score(penguins_df_default_index, data in reloaded_model._bqml_model.model_name ) assert reloaded_model.fit_intercept is True - assert reloaded_model.class_weights is None + assert reloaded_model.class_weight is None def test_logistic_regression_customized_params_fit_score( penguins_df_default_index, dataset_id ): model = bigframes.ml.linear_model.LogisticRegression( - fit_intercept=False, class_weights="balanced" + fit_intercept=False, + class_weight="balanced", + l2_reg=0.2, + tol=0.02, + l1_reg=0.2, + max_iterations=30, + optimize_strategy="batch_gradient_descent", + learning_rate_strategy="constant", + learning_rate=0.2, ) df = penguins_df_default_index.dropna() X_train = df[ @@ -189,12 +209,12 @@ def test_logistic_regression_customized_params_fit_score( result = model.score(X_train, y_train).to_pandas() expected = pd.DataFrame( { - "precision": [0.58483], - "recall": [0.586616], - "accuracy": [0.877246], - "f1_score": [0.58571], - "log_loss": [1.032699], - "roc_auc": [0.924132], + "precision": [0.487], + "recall": [0.602], + "accuracy": [0.464], + "f1_score": [0.379], + "log_loss": [0.972], + "roc_auc": [0.700], }, dtype="Float64", ) @@ -209,5 +229,16 @@ def test_logistic_regression_customized_params_fit_score( f"{dataset_id}.temp_configured_logistic_reg_model" in reloaded_model._bqml_model.model_name ) + # TODO(garrettwu) optimize_strategy isn't logged in BQML + # assert reloaded_model.optimize_strategy == "BATCH_GRADIENT_DESCENT" assert reloaded_model.fit_intercept is False - assert reloaded_model.class_weights == "balanced" + assert reloaded_model.class_weight == "balanced" + assert reloaded_model.calculate_p_values is False + assert reloaded_model.enable_global_explain is False + assert reloaded_model.l1_reg == 0.2 + assert reloaded_model.l2_reg == 0.2 + assert reloaded_model.ls_init_learning_rate is None + assert reloaded_model.max_iterations == 30 + assert reloaded_model.tol == 0.02 + assert reloaded_model.learning_rate_strategy == "CONSTANT" + assert reloaded_model.learning_rate == 0.2 diff --git a/tests/system/large/test_remote_function.py b/tests/system/large/test_remote_function.py index f0b138c110..cf6b2a01f8 100644 --- a/tests/system/large/test_remote_function.py +++ b/tests/system/large/test_remote_function.py @@ -118,6 +118,7 @@ def bq_cf_connection() -> str: def test_remote_function_multiply_with_ibis( session, scalars_table_id, + bigquery_client, ibis_client, dataset_id, bq_cf_connection, @@ -134,20 +135,22 @@ def test_remote_function_multiply_with_ibis( def multiply(x, y): return x * y - project_id, dataset_name, table_name = scalars_table_id.split(".") + _, dataset_name, table_name = scalars_table_id.split(".") if not ibis_client.dataset: ibis_client.dataset = dataset_name col_name = "int64_col" table = ibis_client.tables[table_name] table = table.filter(table[col_name].notnull()).order_by("rowindex").head(10) - pandas_df_orig = table.execute() + sql = table.compile() + pandas_df_orig = bigquery_client.query(sql).to_dataframe() col = table[col_name] col_2x = multiply(col, 2).name("int64_col_2x") col_square = multiply(col, col).name("int64_col_square") table = table.mutate([col_2x, col_square]) - pandas_df_new = table.execute() + sql = table.compile() + pandas_df_new = bigquery_client.query(sql).to_dataframe() pandas.testing.assert_series_equal( pandas_df_orig[col_name] * 2, @@ -163,7 +166,7 @@ def multiply(x, y): finally: # clean up the gcp assets created for the remote function cleanup_remote_function_assets( - session.bqclient, session.cloudfunctionsclient, multiply + bigquery_client, session.cloudfunctionsclient, multiply ) @@ -171,6 +174,7 @@ def multiply(x, y): def test_remote_function_stringify_with_ibis( session, scalars_table_id, + bigquery_client, ibis_client, dataset_id, bq_cf_connection, @@ -187,19 +191,21 @@ def test_remote_function_stringify_with_ibis( def stringify(x): return f"I got {x}" - project_id, dataset_name, table_name = scalars_table_id.split(".") + _, dataset_name, table_name = scalars_table_id.split(".") if not ibis_client.dataset: ibis_client.dataset = dataset_name col_name = "int64_col" table = ibis_client.tables[table_name] table = table.filter(table[col_name].notnull()).order_by("rowindex").head(10) - pandas_df_orig = table.execute() + sql = table.compile() + pandas_df_orig = bigquery_client.query(sql).to_dataframe() col = table[col_name] col_2x = stringify(col).name("int64_str_col") table = table.mutate([col_2x]) - pandas_df_new = table.execute() + sql = table.compile() + pandas_df_new = bigquery_client.query(sql).to_dataframe() pandas.testing.assert_series_equal( pandas_df_orig[col_name].apply(lambda x: f"I got {x}"), @@ -209,7 +215,7 @@ def stringify(x): finally: # clean up the gcp assets created for the remote function cleanup_remote_function_assets( - session.bqclient, session.cloudfunctionsclient, stringify + bigquery_client, session.cloudfunctionsclient, stringify ) @@ -1041,7 +1047,7 @@ def test_remote_function_via_session_context_connection_setter( # unique dataset_id, even though the cloud function would be reused, the bq # remote function would still be created, making use of the bq connection # set in the BigQueryOptions above. - @session.remote_function([int], int, dataset=dataset_id) + @session.remote_function([int], int, dataset=dataset_id, reuse=False) def square(x): return x * x @@ -1078,7 +1084,7 @@ def square(x): def test_remote_function_default_connection(session, scalars_dfs, dataset_id): try: - @session.remote_function([int], int, dataset=dataset_id) + @session.remote_function([int], int, dataset=dataset_id, reuse=False) def square(x): return x * x @@ -1115,7 +1121,7 @@ def square(x): def test_remote_function_runtime_error(session, scalars_dfs, dataset_id): try: - @session.remote_function([int], int, dataset=dataset_id) + @session.remote_function([int], int, dataset=dataset_id, reuse=False) def square(x): return x * x diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 854672585d..b952289a72 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -266,3 +266,40 @@ def test_dt_strftime_time(): bf_result, expected_result, check_index_type=False, check_dtype=False ) assert bf_result.dtype == "string[pyarrow]" + + +@pytest.mark.parametrize( + ("col_name",), + DATETIME_COL_NAMES, +) +@skip_legacy_pandas +def test_dt_normalize(scalars_dfs, col_name): + scalars_df, scalars_pandas_df = scalars_dfs + bf_result = scalars_df[col_name].dt.normalize().to_pandas() + pd_result = scalars_pandas_df[col_name].dt.normalize() + + assert_series_equal( + pd_result.astype(scalars_df[col_name].dtype), # normalize preserves type + bf_result, + ) + + +@pytest.mark.parametrize( + ("col_name", "freq"), + [ + ("timestamp_col", "D"), + ("timestamp_col", "min"), + ("datetime_col", "s"), + ("datetime_col", "us"), + ], +) +@skip_legacy_pandas +def test_dt_floor(scalars_dfs, col_name, freq): + scalars_df, scalars_pandas_df = scalars_dfs + bf_result = scalars_df[col_name].dt.floor(freq).to_pandas() + pd_result = scalars_pandas_df[col_name].dt.floor(freq) + + assert_series_equal( + pd_result.astype(scalars_df[col_name].dtype), # floor preserves type + bf_result, + ) diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py index 876c8f7d04..41ea7d4ebb 100644 --- a/tests/system/small/operations/test_plotting.py +++ b/tests/system/small/operations/test_plotting.py @@ -13,9 +13,11 @@ # limitations under the License. import numpy as np +import pandas as pd import pandas._testing as tm import pytest +import bigframes.operations._matplotlib.core as bf_mpl import bigframes.pandas as bpd @@ -207,12 +209,42 @@ def test_scatter(scalars_dfs): ) +@pytest.mark.parametrize( + ("c"), + [ + pytest.param("red", id="red"), + pytest.param("c", id="int_column"), + pytest.param("species", id="color_column"), + pytest.param(3, id="column_index"), + ], +) +def test_scatter_args_c(c): + data = { + "a": [1, 2, 3], + "b": [1, 2, 3], + "c": [1, 2, 3], + "species": ["r", "g", "b"], + } + df = bpd.DataFrame(data) + pd_df = pd.DataFrame(data) + + ax = df.plot.scatter(x="a", y="b", c=c) + pd_ax = pd_df.plot.scatter(x="a", y="b", c=c) + assert len(ax.collections[0].get_facecolor()) == len( + pd_ax.collections[0].get_facecolor() + ) + for idx in range(len(ax.collections[0].get_facecolor())): + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[idx], + pd_ax.collections[0].get_facecolor()[idx], + ) + + def test_sampling_plot_args_n(): - df = bpd.DataFrame(np.arange(1000), columns=["one"]) + df = bpd.DataFrame(np.arange(bf_mpl.DEFAULT_SAMPLING_N * 10), columns=["one"]) ax = df.plot.line() assert len(ax.lines) == 1 - # Default sampling_n is 100 - assert len(ax.lines[0].get_data()[1]) == 100 + assert len(ax.lines[0].get_data()[1]) == bf_mpl.DEFAULT_SAMPLING_N ax = df.plot.line(sampling_n=2) assert len(ax.lines) == 1 @@ -220,7 +252,7 @@ def test_sampling_plot_args_n(): def test_sampling_plot_args_random_state(): - df = bpd.DataFrame(np.arange(1000), columns=["one"]) + df = bpd.DataFrame(np.arange(bf_mpl.DEFAULT_SAMPLING_N * 10), columns=["one"]) ax_0 = df.plot.line() ax_1 = df.plot.line() ax_2 = df.plot.line(sampling_random_state=100) @@ -235,6 +267,18 @@ def test_sampling_plot_args_random_state(): tm.assert_almost_equal(ax_0.lines[0].get_data()[1], ax_2.lines[0].get_data()[1]) +def test_sampling_preserve_ordering(): + df = bpd.DataFrame([0.0, 1.0, 2.0, 3.0, 4.0], index=[1, 3, 4, 2, 0]) + pd_df = pd.DataFrame([0.0, 1.0, 2.0, 3.0, 4.0], index=[1, 3, 4, 2, 0]) + ax = df.plot.line() + pd_ax = pd_df.plot.line() + tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks()) + tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks()) + for line, pd_line in zip(ax.lines, pd_ax.lines): + # Compare y coordinates between the lines + tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1]) + + @pytest.mark.parametrize( ("kind", "col_names", "kwargs"), [ @@ -251,7 +295,7 @@ def test_sampling_plot_args_random_state(): marks=pytest.mark.xfail(raises=ValueError), ), pytest.param( - "uknown", + "bar", ["int64_col", "int64_too"], {}, marks=pytest.mark.xfail(raises=NotImplementedError), diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py index 79f92c94b4..9654c77ec4 100644 --- a/tests/system/small/operations/test_strings.py +++ b/tests/system/small/operations/test_strings.py @@ -181,6 +181,26 @@ def test_len(scalars_dfs): ) +def test_len_with_array_column(nested_df, nested_pandas_df): + """ + Series.str.len() is expected to work on columns containing lists as well as strings. + + See: https://stackoverflow.com/a/41340543/101923 + """ + col_name = "event_sequence" + bf_series: bigframes.series.Series = nested_df[col_name] + bf_result = bf_series.str.len().to_pandas() + pd_result = nested_pandas_df[col_name].str.len() + + # One of dtype mismatches to be documented. Here, the `bf_result.dtype` is `Int64` but + # the `pd_result.dtype` is `float64`: https://github.com/pandas-dev/pandas/issues/51948 + assert_series_equal( + pd_result.astype(pd.Int64Dtype()), + bf_result, + check_index_type=False, + ) + + def test_lower(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index ee32fb25ac..99ee6680fa 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -14,6 +14,7 @@ import io import operator +import sys import tempfile import typing from typing import Tuple @@ -43,8 +44,43 @@ def test_df_construct_copy(scalars_dfs): pandas.testing.assert_frame_equal(bf_result, pd_result) -def test_df_construct_pandas(scalars_dfs): - columns = ["int64_too", "int64_col", "float64_col", "bool_col", "string_col"] +def test_df_construct_pandas_default(scalars_dfs): + # This should trigger the inlined codepath + columns = [ + "int64_too", + "int64_col", + "float64_col", + "bool_col", + "string_col", + "date_col", + "datetime_col", + "numeric_col", + "float64_col", + "time_col", + "timestamp_col", + ] + _, scalars_pandas_df = scalars_dfs + bf_result = dataframe.DataFrame(scalars_pandas_df, columns=columns).to_pandas() + pd_result = pd.DataFrame(scalars_pandas_df, columns=columns) + pandas.testing.assert_frame_equal(bf_result, pd_result) + + +def test_df_construct_pandas_load_job(scalars_dfs): + # This should trigger the inlined codepath + columns = [ + "int64_too", + "int64_col", + "float64_col", + "bool_col", + "string_col", + "date_col", + "datetime_col", + "numeric_col", + "float64_col", + "time_col", + "timestamp_col", + "geography_col", + ] _, scalars_pandas_df = scalars_dfs bf_result = dataframe.DataFrame(scalars_pandas_df, columns=columns).to_pandas() pd_result = pd.DataFrame(scalars_pandas_df, columns=columns) @@ -1056,10 +1092,13 @@ def test_df_iter( assert bf_i == df_i +@skip_legacy_pandas def test_iterrows( scalars_df_index, scalars_pandas_df_index, ): + scalars_df_index = scalars_df_index.add_suffix("_suffix", axis=1) + scalars_pandas_df_index = scalars_pandas_df_index.add_suffix("_suffix", axis=1) for (bf_index, bf_series), (pd_index, pd_series) in zip( scalars_df_index.iterrows(), scalars_pandas_df_index.iterrows() ): @@ -1877,6 +1916,34 @@ def test_corr_w_invalid_parameters(scalars_dfs): scalars_df[columns].corr(min_periods=1) +@pytest.mark.parametrize( + ("columns", "numeric_only"), + [ + (["bool_col", "int64_col", "float64_col"], True), + (["bool_col", "int64_col", "float64_col"], False), + (["bool_col", "int64_col", "float64_col", "string_col"], True), + pytest.param( + ["bool_col", "int64_col", "float64_col", "string_col"], + False, + marks=pytest.mark.xfail( + raises=NotImplementedError, + ), + ), + ], +) +def test_cov_w_numeric_only(scalars_dfs, columns, numeric_only): + scalars_df, scalars_pandas_df = scalars_dfs + bf_result = scalars_df[columns].cov(numeric_only=numeric_only).to_pandas() + pd_result = scalars_pandas_df[columns].cov(numeric_only=numeric_only) + + # BigFrames and Pandas differ in their data type handling: + # - Column types: BigFrames uses Float64, Pandas uses float64. + # - Index types: BigFrames uses strign, Pandas uses object. + pd.testing.assert_frame_equal( + bf_result, pd_result, check_dtype=False, check_index_type=False + ) + + @pytest.mark.parametrize( ("op"), [ @@ -1936,7 +2003,7 @@ def test_mod(scalars_dfs, other_scalar): def test_scalar_binop_str_exception(scalars_dfs): scalars_df, _ = scalars_dfs columns = ["string_col"] - with pytest.raises(TypeError): + with pytest.raises(Exception): (scalars_df[columns] + 1).to_pandas() @@ -2461,6 +2528,8 @@ def test_df_pivot(scalars_dfs, values, index, columns): pd_result = scalars_pandas_df.pivot(values=values, index=index, columns=columns) # Pandas produces NaN, where bq dataframes produces pd.NA + bf_result = bf_result.fillna(float("nan")) + pd_result = pd_result.fillna(float("nan")) pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) @@ -2760,7 +2829,7 @@ def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs): bf_df.loc[bf_df["int64_too"] == 0, "new_col"] = 99 pd_df.loc[pd_df["int64_too"] == 0, "new_col"] = 99 - # pandas type difference + # pandas uses float64 instead pd_df["new_col"] = pd_df["new_col"].astype("Float64") pd.testing.assert_frame_equal( @@ -2785,7 +2854,7 @@ def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs): ) -def test_loc_setitem_bool_series_scalar_type_error(scalars_dfs): +def test_loc_setitem_bool_series_scalar_error(scalars_dfs): if pd.__version__.startswith("1."): pytest.skip("this loc overload not supported in pandas 1.x.") @@ -2793,9 +2862,9 @@ def test_loc_setitem_bool_series_scalar_type_error(scalars_dfs): bf_df = scalars_df.copy() pd_df = scalars_pandas_df.copy() - with pytest.raises(TypeError): + with pytest.raises(Exception): bf_df.loc[bf_df["int64_too"] == 1, "string_col"] = 99 - with pytest.raises(TypeError): + with pytest.raises(Exception): pd_df.loc[pd_df["int64_too"] == 1, "string_col"] = 99 @@ -3011,6 +3080,28 @@ def test_sample_raises_value_error(scalars_dfs): scalars_df.sample(frac=0.5, n=4) +def test_sample_args_sort(scalars_dfs): + scalars_df, _ = scalars_dfs + index = [4, 3, 2, 5, 1, 0] + scalars_df = scalars_df.iloc[index] + + kwargs = {"frac": 1.0, "random_state": 333} + + df = scalars_df.sample(**kwargs).to_pandas() + assert df.index.values != index + assert df.index.values != sorted(index) + + df = scalars_df.sample(sort="random", **kwargs).to_pandas() + assert df.index.values != index + assert df.index.values != sorted(index) + + df = scalars_df.sample(sort=True, **kwargs).to_pandas() + assert df.index.values == sorted(index) + + df = scalars_df.sample(sort=False, **kwargs).to_pandas() + assert df.index.values == index + + @pytest.mark.parametrize( ("axis",), [ @@ -3943,6 +4034,13 @@ def test_df_dot_operator_series( ) +# TODO(tswast): We may be able to re-enable this test after we break large +# queries up in https://github.com/googleapis/python-bigquery-dataframes/pull/427 +@pytest.mark.skipif( + sys.version_info >= (3, 12), + # See: https://github.com/python/cpython/issues/112282 + reason="setrecursionlimit has no effect on the Python C stack since Python 3.12.", +) def test_recursion_limit(scalars_df_index): scalars_df_index = scalars_df_index[["int64_too", "int64_col", "float64_col"]] for i in range(400): @@ -3958,7 +4056,7 @@ def test_to_pandas_downsampling_option_override(session): total_memory_bytes = df.memory_usage(deep=True).sum() total_memory_mb = total_memory_bytes / (1024 * 1024) - assert total_memory_mb == pytest.approx(download_size, rel=0.3) + assert total_memory_mb == pytest.approx(download_size, rel=0.5) def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_created): diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py index adc729565e..10d7408790 100644 --- a/tests/system/small/test_dataframe_io.py +++ b/tests/system/small/test_dataframe_io.py @@ -19,7 +19,7 @@ import pyarrow as pa import pytest -from tests.system.utils import assert_pandas_df_equal, convert_pandas_dtypes +from tests.system import utils try: import pandas_gbq # type: ignore @@ -115,7 +115,6 @@ def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index): pd.testing.assert_series_equal(actual, expected) -@pytest.mark.skip(reason="Disable to unblock kokoro tests") @pytest.mark.parametrize( ("index"), [True, False], @@ -150,12 +149,12 @@ def test_to_csv_index( # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string dtype.pop("bytes_col") gcs_df = pd.read_csv( - path, + utils.get_first_file_from_wildcard(path), dtype=dtype, date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"}, index_col=index_col, ) - convert_pandas_dtypes(gcs_df, bytes_col=True) + utils.convert_pandas_dtypes(gcs_df, bytes_col=True) gcs_df.index.name = scalars_df.index.name scalars_pandas_df = scalars_pandas_df.copy() @@ -164,7 +163,6 @@ def test_to_csv_index( pd.testing.assert_frame_equal(gcs_df, scalars_pandas_df) -@pytest.mark.skip(reason="Disable to unblock kokoro tests") def test_to_csv_tabs( scalars_dfs: Tuple[bigframes.dataframe.DataFrame, pd.DataFrame], gcs_folder: str, @@ -189,13 +187,13 @@ def test_to_csv_tabs( # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string dtype.pop("bytes_col") gcs_df = pd.read_csv( - path, + utils.get_first_file_from_wildcard(path), sep="\t", dtype=dtype, date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"}, index_col=index_col, ) - convert_pandas_dtypes(gcs_df, bytes_col=True) + utils.convert_pandas_dtypes(gcs_df, bytes_col=True) gcs_df.index.name = scalars_df.index.name scalars_pandas_df = scalars_pandas_df.copy() @@ -229,7 +227,7 @@ def test_to_gbq_index(scalars_dfs, dataset_id, index): else: df_out = df_out.sort_values("rowindex_2").reset_index(drop=True) - convert_pandas_dtypes(df_out, bytes_col=False) + utils.convert_pandas_dtypes(df_out, bytes_col=False) # pd.read_gbq interpets bytes_col as object, reconvert to pyarrow binary df_out["bytes_col"] = df_out["bytes_col"].astype(pd.ArrowDtype(pa.binary())) expected = scalars_pandas_df.copy() @@ -415,7 +413,6 @@ def test_to_json_index_invalid_lines( scalars_df.to_json(path, index=index) -@pytest.mark.skip(reason="Disable to unblock kokoro tests") @pytest.mark.parametrize( ("index"), [True, False], @@ -435,8 +432,12 @@ def test_to_json_index_records_orient( """ Test the `to_json` API with `orient` is `records` and `lines` is True""" scalars_df.to_json(path, index=index, orient="records", lines=True) - gcs_df = pd.read_json(path, lines=True, convert_dates=["datetime_col"]) - convert_pandas_dtypes(gcs_df, bytes_col=True) + gcs_df = pd.read_json( + utils.get_first_file_from_wildcard(path), + lines=True, + convert_dates=["datetime_col"], + ) + utils.convert_pandas_dtypes(gcs_df, bytes_col=True) if index and scalars_df.index.name is not None: gcs_df = gcs_df.set_index(scalars_df.index.name) @@ -474,8 +475,8 @@ def test_to_parquet_index(scalars_dfs, gcs_folder, index): # table. scalars_df.to_parquet(path, index=index) - gcs_df = pd.read_parquet(path.replace("*", "000000000000")) - convert_pandas_dtypes(gcs_df, bytes_col=False) + gcs_df = pd.read_parquet(utils.get_first_file_from_wildcard(path)) + utils.convert_pandas_dtypes(gcs_df, bytes_col=False) if index and scalars_df.index.name is not None: gcs_df = gcs_df.set_index(scalars_df.index.name) @@ -507,7 +508,7 @@ def test_to_sql_query_unnamed_index_included( pd_df = scalars_pandas_df_default_index.reset_index(drop=True) roundtrip = session.read_gbq(sql, index_col=idx_ids) roundtrip.index.names = [None] - assert_pandas_df_equal(roundtrip.to_pandas(), pd_df, check_index_type=False) + utils.assert_pandas_df_equal(roundtrip.to_pandas(), pd_df, check_index_type=False) def test_to_sql_query_named_index_included( @@ -524,7 +525,7 @@ def test_to_sql_query_named_index_included( pd_df = scalars_pandas_df_default_index.set_index("rowindex_2", drop=True) roundtrip = session.read_gbq(sql, index_col=idx_ids) - assert_pandas_df_equal(roundtrip.to_pandas(), pd_df) + utils.assert_pandas_df_equal(roundtrip.to_pandas(), pd_df) def test_to_sql_query_unnamed_index_excluded( @@ -539,7 +540,7 @@ def test_to_sql_query_unnamed_index_excluded( pd_df = scalars_pandas_df_default_index.reset_index(drop=True) roundtrip = session.read_gbq(sql) - assert_pandas_df_equal( + utils.assert_pandas_df_equal( roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True ) @@ -558,6 +559,6 @@ def test_to_sql_query_named_index_excluded( "rowindex_2", drop=True ).reset_index(drop=True) roundtrip = session.read_gbq(sql) - assert_pandas_df_equal( + utils.assert_pandas_df_equal( roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True ) diff --git a/tests/system/small/test_encryption.py b/tests/system/small/test_encryption.py index f13d2b9e1a..3389e5cd68 100644 --- a/tests/system/small/test_encryption.py +++ b/tests/system/small/test_encryption.py @@ -19,6 +19,7 @@ import bigframes import bigframes.ml.linear_model +from tests.system import utils @pytest.fixture(scope="module") @@ -145,9 +146,6 @@ def test_df_apis(bq_cmek, session_with_bq_cmek, scalars_table_id): pytest.param( None, id="default_engine", - marks=pytest.mark.skip( - reason="Internal issue 327544164, cmek does not propagate to the dataframe." - ), ), ], ) @@ -160,7 +158,7 @@ def test_read_csv_gcs( # Create a csv in gcs write_path = gcs_folder + "test_read_csv_gcs_bigquery_engine*.csv" read_path = ( - write_path.replace("*", "000000000000") if engine is None else write_path + utils.get_first_file_from_wildcard(write_path) if engine is None else write_path ) scalars_df_index.to_csv(write_path) @@ -206,9 +204,6 @@ def test_to_gbq(bq_cmek, session_with_bq_cmek, scalars_table_id): assert output_table_dataset.default_encryption_configuration is None -@pytest.mark.skip( - reason="Internal issue 327544164, cmek does not propagate to the dataframe." -) def test_read_pandas(bq_cmek, session_with_bq_cmek): if not bq_cmek: pytest.skip("no cmek set for testing") diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py index b38dcaf5d1..e7ecbedfc2 100644 --- a/tests/system/small/test_groupby.py +++ b/tests/system/small/test_groupby.py @@ -228,7 +228,9 @@ def test_dataframe_groupby_multi_sum( (lambda x: x.cumsum(numeric_only=True)), (lambda x: x.cummax(numeric_only=True)), (lambda x: x.cummin(numeric_only=True)), - (lambda x: x.cumprod()), + # pandas 2.2 uses floating point for cumulative product even for + # integer inputs. + (lambda x: x.cumprod().astype("Float64")), (lambda x: x.shift(periods=2)), ], ids=[ diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py index 4a293526df..d585d4f73e 100644 --- a/tests/system/small/test_multiindex.py +++ b/tests/system/small/test_multiindex.py @@ -45,8 +45,9 @@ def test_read_pandas_multi_index_axes(): [[1, 2], [3, 4]], index=index, columns=columns, dtype=pandas.Int64Dtype() ) bf_df = bpd.DataFrame(pandas_df) + bf_df_computed = bf_df.to_pandas() - pandas.testing.assert_frame_equal(bf_df.to_pandas(), pandas_df) + pandas.testing.assert_frame_equal(bf_df_computed, pandas_df) # Row Multi-index tests @@ -920,6 +921,27 @@ def test_corr_w_multi_index(scalars_df_index, scalars_pandas_df_index): ) +def test_cov_w_multi_index(scalars_df_index, scalars_pandas_df_index): + columns = ["int64_too", "float64_col", "int64_col"] + multi_columns = pandas.MultiIndex.from_tuples(zip(["a", "b", "b"], [1, 2, 2])) + + bf = scalars_df_index[columns].copy() + bf.columns = multi_columns + + pd_df = scalars_pandas_df_index[columns].copy() + pd_df.columns = multi_columns + + bf_result = bf.cov(numeric_only=True).to_pandas() + pd_result = pd_df.cov(numeric_only=True) + + # BigFrames and Pandas differ in their data type handling: + # - Column types: BigFrames uses Float64, Pandas uses float64. + # - Index types: BigFrames uses string, Pandas uses object. + pandas.testing.assert_frame_equal( + bf_result, pd_result, check_dtype=False, check_index_type=False + ) + + @pytest.mark.parametrize( ("index_names",), [ diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index ec61329aa5..a080a969c8 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -539,3 +539,46 @@ def test_to_datetime_series(scalars_dfs): pd.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) + + +@pytest.mark.parametrize( + ("arg", "unit"), + [ + ([1, 2, 3], "W"), + ([1, 2, 3], "d"), + ([1, 2, 3], "D"), + ([1, 2, 3], "h"), + ([1, 2, 3], "m"), + ([20242330, 25244685, 34324234], "s"), + ([20242330000, 25244685000, 34324234000], "ms"), + ([20242330000000, 25244685000000, 34324234000000], "us"), + ([20242330000000000, 25244685000000000, 34324234000000000], "ns"), + ], +) +def test_to_datetime_unit_param(arg, unit): + bf_result = bpd.to_datetime(arg, unit=unit).to_pandas().astype("datetime64[ns]") + pd_result = pd.Series(pd.to_datetime(arg, unit=unit)).dt.floor("us") + pd.testing.assert_series_equal( + bf_result, pd_result, check_index_type=False, check_names=False + ) + + +@pytest.mark.parametrize( + ("arg", "utc", "format"), + [ + ([20230110, 20230101, 20230101], False, "%Y%m%d"), + ([201301.01], False, "%Y%m.%d"), + (["2023-01-10", "2023-01-20", "2023-01-01"], True, "%Y-%m-%d"), + (["2014-08-15 07:19"], True, "%Y-%m-%d %H:%M"), + ], +) +def test_to_datetime_format_param(arg, utc, format): + bf_result = ( + bpd.to_datetime(arg, utc=utc, format=format) + .to_pandas() + .astype("datetime64[ns, UTC]" if utc else "datetime64[ns]") + ) + pd_result = pd.Series(pd.to_datetime(arg, utc=utc, format=format)).dt.floor("us") + pd.testing.assert_series_equal( + bf_result, pd_result, check_index_type=False, check_names=False + ) diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py index 1cf494ea6b..e7e434dbd0 100644 --- a/tests/system/small/test_remote_function.py +++ b/tests/system/small/test_remote_function.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import google.api_core.exceptions from google.cloud import bigquery import pandas as pd import pytest @@ -466,6 +467,39 @@ def add_one(x): ) +def test_skip_bq_connection_check(dataset_id_permanent): + connection_name = "connection_does_not_exist" + session = bigframes.Session( + context=bigframes.BigQueryOptions( + bq_connection=connection_name, skip_bq_connection_check=True + ) + ) + + # Make sure that the connection does not exist + with pytest.raises(google.api_core.exceptions.NotFound): + session.bqconnectionclient.get_connection( + name=session.bqconnectionclient.connection_path( + session._project, session._location, connection_name + ) + ) + + # Make sure that an attempt to create a remote function routine with + # non-existent connection would result in an exception thrown by the BQ + # service. + # This is different from the exception throw by the BQ Connection service + # if it was not able to create the connection because of lack of permission + # when skip_bq_connection_check was not set to True: + # google.api_core.exceptions.PermissionDenied: 403 Permission 'resourcemanager.projects.setIamPolicy' denied on resource + with pytest.raises( + google.api_core.exceptions.NotFound, + match=f"Not found: Connection {connection_name}", + ): + + @session.remote_function([int], int, dataset=dataset_id_permanent) + def add_one(x): + return x + 1 + + @pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_detects_invalid_function(bigquery_client, dataset_id): dataset_ref = bigquery.DatasetReference.from_string(dataset_id) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index f63ea977ff..8847753e88 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime as dt import math import re import tempfile @@ -27,6 +28,7 @@ from tests.system.utils import ( assert_pandas_df_equal, assert_series_equal, + get_first_file_from_wildcard, skip_legacy_pandas, ) @@ -42,6 +44,32 @@ def test_series_construct_copy(scalars_dfs): pd.testing.assert_series_equal(bf_result, pd_result) +def test_series_construct_nullable_ints(): + bf_result = series.Series( + [1, 3, bigframes.pandas.NA], index=[0, 4, bigframes.pandas.NA] + ).to_pandas() + + expected_index = pd.Index( + [0, 4, None], + dtype=pd.Int64Dtype(), + ) + expected = pd.Series([1, 3, pd.NA], dtype=pd.Int64Dtype(), index=expected_index) + + pd.testing.assert_series_equal(bf_result, expected) + + +def test_series_construct_timestamps(): + datetimes = [ + dt.datetime(2020, 1, 20, 20, 20, 20, 20), + dt.datetime(2019, 1, 20, 20, 20, 20, 20), + None, + ] + bf_result = series.Series(datetimes).to_pandas() + pd_result = pd.Series(datetimes, dtype=pd.ArrowDtype(pa.timestamp("us"))) + + pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + + def test_series_construct_copy_with_index(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs bf_result = series.Series( @@ -212,10 +240,18 @@ def test_series___getitem__(scalars_dfs, index_col, key): pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) -def test_series___getitem___with_int_key(scalars_dfs): +@pytest.mark.parametrize( + ("key",), + ( + (-2,), + (-1,), + (0,), + (1,), + ), +) +def test_series___getitem___with_int_key(scalars_dfs, key): col_name = "int64_too" index_col = "string_col" - key = 2 scalars_df, scalars_pandas_df = scalars_dfs scalars_df = scalars_df.set_index(index_col, drop=False) scalars_pandas_df = scalars_pandas_df.set_index(index_col, drop=False) @@ -1461,7 +1497,8 @@ def test_groupby_prod(scalars_dfs): (lambda x: x.cumcount()), (lambda x: x.cummin()), (lambda x: x.cummax()), - (lambda x: x.cumprod()), + # Pandas 2.2 casts to cumprod to float. + (lambda x: x.cumprod().astype("Float64")), (lambda x: x.diff()), (lambda x: x.shift(2)), (lambda x: x.shift(-2)), @@ -1485,7 +1522,7 @@ def test_groupby_window_ops(scalars_df_index, scalars_pandas_df_index, operator) ).to_pandas() pd_series = operator( scalars_pandas_df_index[col_name].groupby(scalars_pandas_df_index[group_key]) - ).astype(pd.Int64Dtype()) + ).astype(bf_series.dtype) pd.testing.assert_series_equal( pd_series, bf_series, @@ -1631,6 +1668,21 @@ def test_empty_true_memtable(session: bigframes.Session): assert bf_result == pd_result +def test_series_names(scalars_dfs): + scalars_df, scalars_pandas_df = scalars_dfs + + bf_result = scalars_df["string_col"].copy() + bf_result.index.name = "new index name" + bf_result.name = "new series name" + + pd_result = scalars_pandas_df["string_col"].copy() + pd_result.index.name = "new index name" + pd_result.name = "new series name" + + assert pd_result.name == bf_result.name + assert pd_result.index.name == bf_result.index.name + + def test_dtype(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs @@ -2390,11 +2442,10 @@ def test_to_frame(scalars_dfs): assert_pandas_df_equal(bf_result, pd_result) -@pytest.mark.skip(reason="Disable to unblock kokoro tests") def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index): path = gcs_folder + "test_series_to_json*.jsonl" scalars_df_index["int64_col"].to_json(path, lines=True, orient="records") - gcs_df = pd.read_json(path, lines=True) + gcs_df = pd.read_json(get_first_file_from_wildcard(path), lines=True) pd.testing.assert_series_equal( gcs_df["int64_col"].astype(pd.Int64Dtype()), @@ -2404,11 +2455,10 @@ def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index): ) -@pytest.mark.skip(reason="Disable to unblock kokoro tests") def test_to_csv(gcs_folder, scalars_df_index, scalars_pandas_df_index): path = gcs_folder + "test_series_to_csv*.csv" scalars_df_index["int64_col"].to_csv(path) - gcs_df = pd.read_csv(path) + gcs_df = pd.read_csv(get_first_file_from_wildcard(path)) pd.testing.assert_series_equal( gcs_df["int64_col"].astype(pd.Int64Dtype()), @@ -2660,7 +2710,14 @@ def foo(x): ("timestamp_col", "time64[us][pyarrow]"), ("timestamp_col", pd.ArrowDtype(pa.timestamp("us"))), ("datetime_col", "date32[day][pyarrow]"), - ("datetime_col", "string[pyarrow]"), + pytest.param( + "datetime_col", + "string[pyarrow]", + marks=pytest.mark.skipif( + pd.__version__.startswith("2.2"), + reason="pandas 2.2 uses T as date/time separator whereas earlier versions use space", + ), + ), ("datetime_col", "time64[us][pyarrow]"), ("datetime_col", pd.ArrowDtype(pa.timestamp("us", tz="UTC"))), ("date_col", "string[pyarrow]"), @@ -3100,8 +3157,8 @@ def test_query_job_setters(scalars_dfs): ], ) def test_is_monotonic_increasing(series_input): - scalars_df = series.Series(series_input) - scalars_pandas_df = pd.Series(series_input) + scalars_df = series.Series(series_input, dtype=pd.Int64Dtype()) + scalars_pandas_df = pd.Series(series_input, dtype=pd.Int64Dtype()) assert ( scalars_df.is_monotonic_increasing == scalars_pandas_df.is_monotonic_increasing ) @@ -3245,7 +3302,10 @@ def test_apply_lambda(scalars_dfs, col, lambda_): bf_result = bf_col.apply(lambda_, by_row=False).to_pandas() pd_col = scalars_pandas_df[col] - pd_result = pd_col.apply(lambda_) + if pd.__version__.startswith("2.2"): + pd_result = pd_col.apply(lambda_, by_row=False) + else: + pd_result = pd_col.apply(lambda_) # ignore dtype check, which are Int64 and object respectively assert_series_equal(bf_result, pd_result, check_dtype=False) @@ -3296,7 +3356,11 @@ def foo(x): bf_result = bf_col.apply(foo, by_row=False).to_pandas() pd_col = scalars_pandas_df["int64_col"] - pd_result = pd_col.apply(foo) + + if pd.__version__.startswith("2.2"): + pd_result = pd_col.apply(foo, by_row=False) + else: + pd_result = pd_col.apply(foo) # ignore dtype check, which are Int64 and object respectively assert_series_equal(bf_result, pd_result, check_dtype=False) diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index d0cd24e2be..d0c20f3839 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -20,6 +20,7 @@ import typing from typing import List +import google import google.cloud.bigquery as bigquery import numpy as np import pandas as pd @@ -30,9 +31,7 @@ import bigframes.dataframe import bigframes.dtypes import bigframes.ml.linear_model -from tests.system.utils import skip_legacy_pandas - -FIRST_FILE = "000000000000" +from tests.system import utils def test_read_gbq_tokyo( @@ -365,6 +364,47 @@ def test_read_gbq_table_wildcard_with_filter(session: bigframes.Session): assert df.shape == (348485, 32) +@pytest.mark.parametrize( + ("config"), + [ + { + "query": { + "useQueryCache": True, + "maximumBytesBilled": "1000000000", + "timeoutMs": 10000, + } + }, + pytest.param( + {"query": {"useQueryCache": True, "timeoutMs": 50}}, + marks=pytest.mark.xfail( + raises=google.api_core.exceptions.BadRequest, + reason="Expected failure due to timeout being set too short.", + ), + ), + pytest.param( + {"query": {"useQueryCache": False, "maximumBytesBilled": "100"}}, + marks=pytest.mark.xfail( + raises=google.api_core.exceptions.InternalServerError, + reason="Expected failure when the query exceeds the maximum bytes billed limit.", + ), + ), + ], +) +def test_read_gbq_with_configuration( + session: bigframes.Session, scalars_table_id: str, config: dict +): + query = f"""SELECT + t.float64_col * 2 AS my_floats, + CONCAT(t.string_col, "_2") AS my_strings, + t.int64_col > 0 AS my_bools, + FROM `{scalars_table_id}` AS t + """ + + df = session.read_gbq(query, configuration=config) + + assert df.shape == (9, 3) + + def test_read_gbq_model(session, penguins_linear_model_name): model = session.read_gbq_model(penguins_linear_model_name) assert isinstance(model, bigframes.ml.linear_model.LinearRegression) @@ -435,14 +475,14 @@ def test_read_pandas_tokyo( pd.testing.assert_frame_equal(result, expected) -@skip_legacy_pandas +@utils.skip_legacy_pandas def test_read_csv_gcs_default_engine(session, scalars_dfs, gcs_folder): scalars_df, _ = scalars_dfs if scalars_df.index.name is not None: path = gcs_folder + "test_read_csv_gcs_default_engine_w_index*.csv" else: path = gcs_folder + "test_read_csv_gcs_default_engine_wo_index*.csv" - read_path = path.replace("*", FIRST_FILE) + read_path = utils.get_first_file_from_wildcard(path) scalars_df.to_csv(path, index=False) dtype = scalars_df.dtypes.to_dict() dtype.pop("geography_col") @@ -492,7 +532,7 @@ def test_read_csv_gcs_bq_engine(session, scalars_dfs, gcs_folder): pytest.param("\t", id="custom_sep"), ], ) -@skip_legacy_pandas +@utils.skip_legacy_pandas def test_read_csv_local_default_engine(session, scalars_dfs, sep): scalars_df, scalars_pandas_df = scalars_dfs with tempfile.TemporaryDirectory() as dir: @@ -641,7 +681,7 @@ def test_read_csv_default_engine_throws_not_implemented_error( gcs_folder + "test_read_csv_gcs_default_engine_throws_not_implemented_error*.csv" ) - read_path = path.replace("*", FIRST_FILE) + read_path = utils.get_first_file_from_wildcard(path) scalars_df_index.to_csv(path) with pytest.raises(NotImplementedError, match=match): session.read_csv(read_path, **kwargs) @@ -649,7 +689,7 @@ def test_read_csv_default_engine_throws_not_implemented_error( def test_read_csv_gcs_default_engine_w_header(session, scalars_df_index, gcs_folder): path = gcs_folder + "test_read_csv_gcs_default_engine_w_header*.csv" - read_path = path.replace("*", FIRST_FILE) + read_path = utils.get_first_file_from_wildcard(path) scalars_df_index.to_csv(path) # Skips header=N rows, normally considers the N+1th row as the header, but overridden by @@ -716,7 +756,7 @@ def test_read_csv_gcs_default_engine_w_index_col_name( session, scalars_df_default_index, gcs_folder ): path = gcs_folder + "test_read_csv_gcs_default_engine_w_index_col_name*.csv" - read_path = path.replace("*", FIRST_FILE) + read_path = utils.get_first_file_from_wildcard(path) scalars_df_default_index.to_csv(path) df = session.read_csv(read_path, index_col="rowindex") @@ -731,7 +771,7 @@ def test_read_csv_gcs_default_engine_w_index_col_index( session, scalars_df_default_index, gcs_folder ): path = gcs_folder + "test_read_csv_gcs_default_engine_w_index_col_index*.csv" - read_path = path.replace("*", FIRST_FILE) + read_path = utils.get_first_file_from_wildcard(path) scalars_df_default_index.to_csv(path) index_col = scalars_df_default_index.columns.to_list().index("rowindex") @@ -790,7 +830,7 @@ def test_read_csv_local_default_engine_w_index_col_index( def test_read_csv_gcs_w_usecols(session, scalars_df_index, gcs_folder, engine): path = gcs_folder + "test_read_csv_gcs_w_usecols" path = path + "_default_engine*.csv" if engine is None else path + "_bq_engine*.csv" - read_path = path.replace("*", FIRST_FILE) if engine is None else path + read_path = utils.get_first_file_from_wildcard(path) if engine is None else path scalars_df_index.to_csv(path) # df should only have 1 column which is bool_col. @@ -902,7 +942,7 @@ def test_read_parquet_gcs(session: bigframes.Session, scalars_dfs, gcs_folder, e # Only bigquery engine for reads supports wildcards in path name. if engine != "bigquery": - path = path.replace("*", "000000000000") + path = utils.get_first_file_from_wildcard(path) df_out = ( session.read_parquet(path, engine=engine) @@ -1012,7 +1052,7 @@ def test_read_parquet_gcs_compression_not_supported( def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder): scalars_df, _ = scalars_dfs path = gcs_folder + "test_read_json_gcs_bq_engine_w_index*.json" - read_path = path.replace("*", FIRST_FILE) + read_path = utils.get_first_file_from_wildcard(path) scalars_df.to_json(path, index=False, lines=True, orient="records") df = session.read_json(read_path, lines=True, orient="records", engine="bigquery") @@ -1036,7 +1076,7 @@ def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder): def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder): scalars_df, _ = scalars_dfs path = gcs_folder + "test_read_json_gcs_default_engine_w_index*.json" - read_path = path.replace("*", FIRST_FILE) + read_path = utils.get_first_file_from_wildcard(path) scalars_df.to_json( path, index=False, diff --git a/tests/system/utils.py b/tests/system/utils.py index 8ea49ed7e2..e40502e6f2 100644 --- a/tests/system/utils.py +++ b/tests/system/utils.py @@ -304,3 +304,7 @@ def delete_cloud_function( request = functions_v2.DeleteFunctionRequest(name=full_name) operation = functions_client.delete_function(request=request) return operation + + +def get_first_file_from_wildcard(path): + return path.replace("*", "000000000000") diff --git a/tests/unit/_config/test_bigquery_options.py b/tests/unit/_config/test_bigquery_options.py index 1ce70e3da2..cf13084610 100644 --- a/tests/unit/_config/test_bigquery_options.py +++ b/tests/unit/_config/test_bigquery_options.py @@ -30,6 +30,7 @@ ("bq_connection", "path/to/connection/1", "path/to/connection/2"), ("use_regional_endpoints", False, True), ("kms_key_name", "kms/key/name/1", "kms/key/name/2"), + ("skip_bq_connection_check", False, True), ], ) def test_setter_raises_if_session_started(attribute, original_value, new_value): diff --git a/tests/unit/core/test_blocks.py b/tests/unit/core/test_blocks.py index 0bb5e0101a..8cde187cb3 100644 --- a/tests/unit/core/test_blocks.py +++ b/tests/unit/core/test_blocks.py @@ -82,7 +82,7 @@ def test_block_from_local(data): # hard-coded the returned dimension of the session for that each of the test case contains 3 rows. mock_session._execute.return_value = (iter([[3]]), None) - block = blocks.Block.from_local(data, mock_session) + block = blocks.Block.from_local(pandas.DataFrame(data), mock_session) pandas.testing.assert_index_equal(block.column_labels, expected.columns) assert tuple(block.index.names) == tuple(expected.index.names) diff --git a/tests/unit/ml/test_golden_sql.py b/tests/unit/ml/test_golden_sql.py index d63bc7aaa1..bcb220b107 100644 --- a/tests/unit/ml/test_golden_sql.py +++ b/tests/unit/ml/test_golden_sql.py @@ -105,7 +105,7 @@ def test_linear_regression_default_fit( model.fit(mock_X, mock_y) mock_session._start_query_ml_ddl.assert_called_once_with( - 'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LINEAR_REG",\n data_split_method="NO_SPLIT",\n optimize_strategy="normal_equation",\n fit_intercept=True,\n l2_reg=0.0,\n max_iterations=20,\n learn_rate_strategy="line_search",\n early_stop=True,\n min_rel_progress=0.01,\n ls_init_learn_rate=0.1,\n calculate_p_values=False,\n enable_global_explain=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql' + 'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LINEAR_REG",\n data_split_method="NO_SPLIT",\n optimize_strategy="auto_strategy",\n fit_intercept=True,\n l2_reg=0.0,\n max_iterations=20,\n learn_rate_strategy="line_search",\n min_rel_progress=0.01,\n calculate_p_values=False,\n enable_global_explain=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql' ) @@ -115,7 +115,7 @@ def test_linear_regression_params_fit(bqml_model_factory, mock_session, mock_X, model.fit(mock_X, mock_y) mock_session._start_query_ml_ddl.assert_called_once_with( - 'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LINEAR_REG",\n data_split_method="NO_SPLIT",\n optimize_strategy="normal_equation",\n fit_intercept=False,\n l2_reg=0.0,\n max_iterations=20,\n learn_rate_strategy="line_search",\n early_stop=True,\n min_rel_progress=0.01,\n ls_init_learn_rate=0.1,\n calculate_p_values=False,\n enable_global_explain=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql' + 'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LINEAR_REG",\n data_split_method="NO_SPLIT",\n optimize_strategy="auto_strategy",\n fit_intercept=False,\n l2_reg=0.0,\n max_iterations=20,\n learn_rate_strategy="line_search",\n min_rel_progress=0.01,\n calculate_p_values=False,\n enable_global_explain=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql' ) @@ -148,7 +148,7 @@ def test_logistic_regression_default_fit( model.fit(mock_X, mock_y) mock_session._start_query_ml_ddl.assert_called_once_with( - 'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LOGISTIC_REG",\n data_split_method="NO_SPLIT",\n fit_intercept=True,\n auto_class_weights=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql' + 'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LOGISTIC_REG",\n data_split_method="NO_SPLIT",\n fit_intercept=True,\n auto_class_weights=False,\n optimize_strategy="auto_strategy",\n l2_reg=0.0,\n max_iterations=20,\n learn_rate_strategy="line_search",\n min_rel_progress=0.01,\n calculate_p_values=False,\n enable_global_explain=False,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql' ) @@ -156,13 +156,21 @@ def test_logistic_regression_params_fit( bqml_model_factory, mock_session, mock_X, mock_y ): model = linear_model.LogisticRegression( - fit_intercept=False, class_weights="balanced" + fit_intercept=False, + class_weight="balanced", + l2_reg=0.2, + tol=0.02, + l1_reg=0.2, + max_iterations=30, + optimize_strategy="batch_gradient_descent", + learning_rate_strategy="constant", + learning_rate=0.2, ) model._bqml_model_factory = bqml_model_factory model.fit(mock_X, mock_y) mock_session._start_query_ml_ddl.assert_called_once_with( - 'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LOGISTIC_REG",\n data_split_method="NO_SPLIT",\n fit_intercept=False,\n auto_class_weights=True,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql' + 'CREATE OR REPLACE MODEL `test-project`.`_anon123`.`temp_model_id`\nOPTIONS(\n model_type="LOGISTIC_REG",\n data_split_method="NO_SPLIT",\n fit_intercept=False,\n auto_class_weights=True,\n optimize_strategy="batch_gradient_descent",\n l2_reg=0.2,\n max_iterations=30,\n learn_rate_strategy="constant",\n min_rel_progress=0.02,\n calculate_p_values=False,\n enable_global_explain=False,\n l1_reg=0.2,\n learn_rate=0.2,\n INPUT_LABEL_COLS=["input_column_label"])\nAS input_X_y_sql' ) diff --git a/tests/unit/resources.py b/tests/unit/resources.py index b57cd85360..6846659930 100644 --- a/tests/unit/resources.py +++ b/tests/unit/resources.py @@ -119,7 +119,7 @@ def create_arrayvalue( columns = tuple(ibis_table[key] for key in ibis_table.columns) ordering = bigframes.core.ordering.ExpressionOrdering( tuple( - [core.OrderingColumnReference(column) for column in total_ordering_columns] + [core.orderings.ascending_over(column) for column in total_ordering_columns] ), total_ordering_columns=frozenset(total_ordering_columns), ) diff --git a/tests/unit/test_clients.py b/tests/unit/test_clients.py index f89cc21397..37450ececb 100644 --- a/tests/unit/test_clients.py +++ b/tests/unit/test_clients.py @@ -18,21 +18,21 @@ def test_get_connection_name_full_connection_id(): - connection_name = clients.BqConnectionManager.resolve_full_connection_name( + connection_name = clients.resolve_full_bq_connection_name( "connection-id", default_project="default-project", default_location="us" ) assert connection_name == "default-project.us.connection-id" def test_get_connection_name_full_location_connection_id(): - connection_name = clients.BqConnectionManager.resolve_full_connection_name( + connection_name = clients.resolve_full_bq_connection_name( "eu.connection-id", default_project="default-project", default_location="us" ) assert connection_name == "default-project.eu.connection-id" def test_get_connection_name_full_all(): - connection_name = clients.BqConnectionManager.resolve_full_connection_name( + connection_name = clients.resolve_full_bq_connection_name( "my-project.eu.connection-id", default_project="default-project", default_location="us", @@ -42,7 +42,7 @@ def test_get_connection_name_full_all(): def test_get_connection_name_full_raise_value_error(): with pytest.raises(ValueError): - clients.BqConnectionManager.resolve_full_connection_name( + clients.resolve_full_bq_connection_name( "my-project.eu.connection-id.extra_field", default_project="default-project", default_location="us", diff --git a/tests/unit/test_core.py b/tests/unit/test_core.py index 5f940fd7a5..0a2fc61418 100644 --- a/tests/unit/test_core.py +++ b/tests/unit/test_core.py @@ -17,7 +17,7 @@ import bigframes.core as core import bigframes.core.expression as ex -import bigframes.core.ordering +import bigframes.core.ordering as order import bigframes.operations as ops import bigframes.operations.aggregations as agg_ops @@ -39,8 +39,8 @@ def test_arrayvalue_constructor_from_ibis_table_adds_all_columns(): ) ibis_table = session.ibis_client.table("test_table") columns = (ibis_table["col1"], ibis_table["col2"], ibis_table["col3"]) - ordering = bigframes.core.ordering.ExpressionOrdering( - tuple([core.OrderingColumnReference("col1")]), + ordering = order.ExpressionOrdering( + tuple([order.ascending_over("col1")]), total_ordering_columns=frozenset(["col1"]), ) actual = core.ArrayValue.from_ibis( diff --git a/tests/unit/test_pandas.py b/tests/unit/test_pandas.py index d6af223456..535b748345 100644 --- a/tests/unit/test_pandas.py +++ b/tests/unit/test_pandas.py @@ -21,6 +21,7 @@ import pytest import bigframes.core.global_session +import bigframes.dataframe import bigframes.pandas as bpd import bigframes.session @@ -67,7 +68,11 @@ def test_method_matches_session(method_name: str): # Add `eval_str = True` so that deferred annotations are turned into their # corresponding type objects. Need Python 3.10 for eval_str parameter. - session_signature = inspect.signature(session_method, eval_str=True) + session_signature = inspect.signature( + session_method, + eval_str=True, + globals={**vars(bigframes.session), **{"dataframe": bigframes.dataframe}}, + ) pandas_signature = inspect.signature(pandas_method, eval_str=True) assert [ # Kind includes position, which will be an offset. diff --git a/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py index 4f7e33909e..bd5f055ece 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py @@ -33,6 +33,67 @@ def strftime(self, date_format: str): Date format string (e.g. "%Y-%m-%d"). Returns: - bigframes.series.Series of formatted strings. + bigframes.series.Series: Series of formatted strings. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def normalize(self): + """ + Convert times to midnight. + + The time component of the date-time is converted to midnight i.e. + 00:00:00. This is useful in cases when the time does not matter. + The return dtype will match the source series. + + This method is available on Series with datetime values under the + .dt accessor. + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> s = bpd.Series(pd.date_range( + ... start='2014-08-01 10:00', + ... freq='h', + ... periods=3, + ... tz='Asia/Calcutta')) # note timezones will be converted to UTC here + >>> s.dt.normalize() + 0 2014-08-01 00:00:00+00:00 + 1 2014-08-01 00:00:00+00:00 + 2 2014-08-01 00:00:00+00:00 + dtype: timestamp[us, tz=UTC][pyarrow] + + Returns: + bigframes.series.Series of the same dtype as the data. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def floor(self, freq: str): + """ + Perform floor operation on the data to the specified freq. + + Supported freq arguments are: 'Y' (year), 'Q' (quarter), 'M' + (month), 'W' (week), 'D' (day), 'h' (hour), 'min' (minute), 's' + (second), 'ms' (microsecond), 'us' (nanosecond), 'ns' (nanosecond) + + Behavior around clock changes (i.e. daylight savings) is determined + by the SQL engine, so "ambiguous" and "nonexistent" parameters are not + supported. Y, Q, M, and W freqs are not supported by pandas as of + version 2.2, but have been added here due to backend support. + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min') + >>> bpd.Series(rng).dt.floor("h") + 0 2018-01-01 11:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: timestamp[us][pyarrow] + + Args: + freq (str): + Frequency string (e.g. "D", "min", "s"). """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 4eceb8a2f1..d70d3827e7 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -13,7 +13,7 @@ from typing import Hashable, Iterable, Literal, Mapping, Optional, Sequence, Union -from bigframes_vendored.pandas.core.generic import NDFrame +import bigframes_vendored.pandas.core.generic as generic import numpy as np import pandas as pd @@ -23,7 +23,7 @@ # DataFrame class -class DataFrame(NDFrame): +class DataFrame(generic.NDFrame): """Two-dimensional, size-mutable, potentially heterogeneous tabular data. Data structure also contains labeled axes (rows and columns). @@ -592,7 +592,7 @@ def to_records( >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df.to_records() rec.array([(0, 1, 3), (1, 2, 4)], - dtype=[('index', 'O'), ('col1', 'O'), ('col2', 'O')]) + dtype=[('index', '>> import bigframes.pandas as bpd @@ -1641,6 +1638,9 @@ def keys(self): ... }) >>> df.keys() Index(['A', 'B'], dtype='object') + + Returns: + Index: Info axis. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1673,6 +1673,17 @@ def itertuples(self, index: bool = True, name: str | None = "Pandas"): """ Iterate over DataFrame rows as namedtuples. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> df = bpd.DataFrame({ + ... 'A': [1, 2, 3], + ... 'B': [4, 5, 6], + ... }) + >>> next(df.itertuples(name="Pair")) + Pair(Index=0, A=1, B=4) + Args: index (bool, default True): If True, return the index as the first element of the tuple. @@ -1685,18 +1696,6 @@ def itertuples(self, index: bool = True, name: str | None = "Pandas"): An object to iterate over namedtuples for each row in the DataFrame with the first field possibly being the index and following fields being the column values. - - - **Examples:** - - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({ - ... 'A': [1, 2, 3], - ... 'B': [4, 5, 6], - ... }) - >>> next(df.itertuples(name="Pair")) - Pair(Index=0, A=1, B=4) """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2835,10 +2834,38 @@ def corr(self, method, min_periods, numeric_only) -> DataFrame: Include only float, int, boolean, decimal data. Returns: - DataFrame: Correlation matrix. + DataFrame: Correlation matrix. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def cov(self, *, numeric_only) -> DataFrame: + """ + Compute pairwise covariance of columns, excluding NA/null values. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'A': [1, 2, 3], + ... 'B': [400, 500, 600], + ... 'C': [0.8, 0.4, 0.9]}) + >>> df.cov(numeric_only=True) + A B C + A 1.0 100.0 0.05 + B 100.0 10000.0 5.0 + C 0.05 5.0 0.07 + + [3 rows x 3 columns] + + Args: + numeric_only(bool, default False): + Include only float, int, boolean, decimal data. + + Returns: + DataFrame: The covariance matrix of the series of the DataFrame. + """ + def update( self, other, join: str = "left", overwrite: bool = True, filter_func=None ) -> DataFrame: @@ -3155,7 +3182,7 @@ def join(self, other, *, on: Optional[str] = None, how: str) -> DataFrame: on: Column in the caller to join on the index in other, otherwise joins index-on-index. Like an Excel VLOOKUP operation. - how ({'left', 'right', 'outer', 'inner'}, default 'left'`): + how ({'left', 'right', 'outer', 'inner'}, default 'left'): How to handle the operation of the two objects. ``left``: use calling frame's index (or column if on is specified) ``right``: use `other`'s index. ``outer``: form union of calling @@ -4403,7 +4430,7 @@ def cumprod(self) -> DataFrame: def diff( self, periods: int = 1, - ) -> NDFrame: + ) -> generic.NDFrame: """First discrete difference of element. Calculates the difference of a DataFrame element compared with another @@ -4751,7 +4778,7 @@ def index(self): >>> df.index # doctest: +ELLIPSIS Index([10, 20, 30], dtype='Int64') >>> df.index.values - array([10, 20, 30], dtype=object) + array([10, 20, 30]) Let's try setting a new index for the dataframe and see that reflect via ``index`` property. diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py index 9358dca17b..04cc3990a4 100644 --- a/third_party/bigframes_vendored/pandas/core/generic.py +++ b/third_party/bigframes_vendored/pandas/core/generic.py @@ -6,7 +6,7 @@ from bigframes_vendored.pandas.core import indexing import bigframes_vendored.pandas.core.common as common -from bigframes import constants +import bigframes.constants as constants if TYPE_CHECKING: from bigframes_vendored.pandas.pandas._typing import T @@ -123,28 +123,55 @@ def astype(self, dtype): Create a series of type ``Int64``: - >>> ser = bpd.Series([1, 2], dtype='Int64') + >>> ser = bpd.Series([2023010000246789, 1624123244123101, 1054834234120101], dtype='Int64') >>> ser - 0 1 - 1 2 + 0 2023010000246789 + 1 1624123244123101 + 2 1054834234120101 dtype: Int64 Convert to ``Float64`` type: >>> ser.astype('Float64') - 0 1.0 - 1 2.0 + 0 2023010000246789.0 + 1 1624123244123101.0 + 2 1054834234120101.0 dtype: Float64 + Convert to ``pd.ArrowDtype(pa.timestamp("us", tz="UTC"))`` type: + + >>> ser.astype("timestamp[us, tz=UTC][pyarrow]") + 0 2034-02-08 11:13:20.246789+00:00 + 1 2021-06-19 17:20:44.123101+00:00 + 2 2003-06-05 17:30:34.120101+00:00 + dtype: timestamp[us, tz=UTC][pyarrow] + + Note that this is equivalent of using ``to_datetime`` with ``unit='us'``: + + >>> bpd.to_datetime(ser, unit='us', utc=True) + 0 2034-02-08 11:13:20.246789+00:00 + 1 2021-06-19 17:20:44.123101+00:00 + 2 2003-06-05 17:30:34.120101+00:00 + dtype: timestamp[us, tz=UTC][pyarrow] + + Convert ``pd.ArrowDtype(pa.timestamp("us", tz="UTC"))`` type to ``Int64`` type: + + >>> timestamp_ser = ser.astype("timestamp[us, tz=UTC][pyarrow]") + >>> timestamp_ser.astype('Int64') + 0 2023010000246789 + 1 1624123244123101 + 2 1054834234120101 + dtype: Int64 + Args: dtype (str or pandas.ExtensionDtype): - A dtype supported by BigQuery DataFrame include 'boolean','Float64','Int64', - 'string', 'string[pyarrow]','timestamp[us, tz=UTC][pyarrow]', - 'timestamp[us][pyarrow]','date32[day][pyarrow]','time64[us][pyarrow]' - A pandas.ExtensionDtype include pandas.BooleanDtype(), pandas.Float64Dtype(), - pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"), - pd.ArrowDtype(pa.date32()), pd.ArrowDtype(pa.time64("us")), - pd.ArrowDtype(pa.timestamp("us")), pd.ArrowDtype(pa.timestamp("us", tz="UTC")). + A dtype supported by BigQuery DataFrame include ``'boolean'``, ``'Float64'``, ``'Int64'``, + ``'int64[pyarrow]'``, ``'string'``, ``'string[pyarrow]'``, ``'timestamp[us, tz=UTC][pyarrow]'``, + ``'timestamp\[us\]\[pyarrow\]'``, ``'date32\[day\]\[pyarrow\]'``, ``'time64\[us\]\[pyarrow\]'``. + A pandas.ExtensionDtype include ``pandas.BooleanDtype()``, ``pandas.Float64Dtype()``, + ``pandas.Int64Dtype()``, ``pandas.StringDtype(storage="pyarrow")``, + ``pd.ArrowDtype(pa.date32())``, ``pd.ArrowDtype(pa.time64("us"))``, + ``pd.ArrowDtype(pa.timestamp("us"))``, ``pd.ArrowDtype(pa.timestamp("us", tz="UTC"))``. Returns: same type as caller @@ -472,6 +499,7 @@ def sample( frac: Optional[float] = None, *, random_state: Optional[int] = None, + sort: Optional[bool | Literal["random"]] = "random", ): """Return a random sample of items from an axis of object. @@ -530,6 +558,12 @@ def sample( Fraction of axis items to return. Cannot be used with `n`. random_state (Optional[int], default None): Seed for random number generator. + sort (Optional[bool|Literal["random"]], default "random"): + + - 'random' (default): No specific ordering will be applied after + sampling. + - 'True' : Index columns will determine the sample's order. + - 'False': The sample will retain the original object's order. Returns: A new object of same type as caller containing `n` items randomly @@ -756,12 +790,12 @@ def isna(self) -> NDFrame: >>> ser = bpd.Series([5, None, 6, np.nan, bpd.NA]) >>> ser - 0 5.0 + 0 5 1 - 2 6.0 + 2 6 3 4 - dtype: Float64 + dtype: Int64 >>> ser.isna() 0 False diff --git a/third_party/bigframes_vendored/pandas/core/indexing.py b/third_party/bigframes_vendored/pandas/core/indexing.py index fae5d6261f..3c7f8a6c9f 100644 --- a/third_party/bigframes_vendored/pandas/core/indexing.py +++ b/third_party/bigframes_vendored/pandas/core/indexing.py @@ -1,6 +1,6 @@ # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/indexing.py -from bigframes import constants +import bigframes.constants as constants class IndexingMixin: diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 7120c4d155..0aebd0660f 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -71,7 +71,7 @@ def index(self): >>> s.index # doctest: +ELLIPSIS Index([10, 20, 30], dtype='Int64') >>> s.index.values - array([10, 20, 30], dtype=object) + array([10, 20, 30]) Let's try setting a multi-index case reflect via ``index`` property. @@ -2035,11 +2035,11 @@ def add(self, other) -> Series: >>> a = bpd.Series([1, 2, 3, bpd.NA]) >>> a - 0 1.0 - 1 2.0 - 2 3.0 + 0 1 + 1 2 + 2 3 3 - dtype: Float64 + dtype: Int64 >>> b = bpd.Series([10, 20, 30, 40]) >>> b @@ -2050,20 +2050,20 @@ def add(self, other) -> Series: dtype: Int64 >>> a.add(b) - 0 11.0 - 1 22.0 - 2 33.0 + 0 11 + 1 22 + 2 33 3 - dtype: Float64 + dtype: Int64 You can also use the mathematical operator ``+``: >>> a + b - 0 11.0 - 1 22.0 - 2 33.0 + 0 11 + 1 22 + 2 33 3 - dtype: Float64 + dtype: Int64 Adding two Series with explicit indexes: @@ -2371,12 +2371,12 @@ def max( >>> s = bpd.Series([1, 3, bpd.NA]) >>> s - 0 1.0 - 1 3.0 + 0 1 + 1 3 2 - dtype: Float64 + dtype: Int64 >>> s.max() - 3.0 + 3 Returns: scalar: Scalar. @@ -2411,12 +2411,12 @@ def min( >>> s = bpd.Series([1, 3, bpd.NA]) >>> s - 0 1.0 - 1 3.0 + 0 1 + 1 3 2 - dtype: Float64 + dtype: Int64 >>> s.min() - 1.0 + 1 Returns: scalar: Scalar. @@ -2498,12 +2498,12 @@ def sum(self): >>> s = bpd.Series([1, 3, bpd.NA]) >>> s - 0 1.0 - 1 3.0 + 0 1 + 1 3 2 - dtype: Float64 + dtype: Int64 >>> s.sum() - 4.0 + 4 Returns: scalar: Scalar. @@ -2532,10 +2532,10 @@ def mean(self): >>> s = bpd.Series([1, 3, bpd.NA]) >>> s - 0 1.0 - 1 3.0 + 0 1 + 1 3 2 - dtype: Float64 + dtype: Int64 >>> s.mean() 2.0 @@ -3305,7 +3305,7 @@ def values(self): >>> bpd.options.display.progress_bar = None >>> bpd.Series([1, 2, 3]).values - array([1, 2, 3], dtype=object) + array([1, 2, 3]) >>> bpd.Series(list('aabc')).values array(['a', 'a', 'b', 'c'], dtype=object) diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py index 74602b5af1..b5feeb13c5 100644 --- a/third_party/bigframes_vendored/pandas/io/gbq.py +++ b/third_party/bigframes_vendored/pandas/io/gbq.py @@ -3,7 +3,7 @@ from __future__ import annotations -from typing import Any, Iterable, Literal, Optional, Tuple, Union +from typing import Any, Dict, Iterable, Literal, Optional, Tuple, Union from bigframes import constants @@ -19,9 +19,10 @@ def read_gbq( *, index_col: Iterable[str] | str = (), columns: Iterable[str] = (), + configuration: Optional[Dict] = None, max_results: Optional[int] = None, filters: FiltersType = (), - use_cache: bool = True, + use_cache: Optional[bool] = None, col_order: Iterable[str] = (), ): """Loads a DataFrame from BigQuery. @@ -107,6 +108,11 @@ def read_gbq( columns (Iterable[str]): List of BigQuery column names in the desired order for results DataFrame. + configuration (dict, optional): + Query config parameters for job processing. + For example: configuration = {'query': {'useQueryCache': False}}. + For more information see `BigQuery REST API Reference + `__. max_results (Optional[int], default None): If set, limit the maximum number of rows to fetch from the query results. @@ -121,8 +127,10 @@ def read_gbq( If using wildcard table suffix in query_or_table, can specify '_table_suffix' pseudo column to filter the tables to be read into the DataFrame. - use_cache (bool, default True): - Whether to cache the query inputs. Default to True. + use_cache (Optional[bool], default None): + Caches query results if set to `True`. When `None`, it behaves + as `True`, but should not be combined with `useQueryCache` in + `configuration` to avoid conflicts. col_order (Iterable[str]): Alias for columns, retained for backwards compatibility. diff --git a/third_party/bigframes_vendored/pandas/plotting/_core.py b/third_party/bigframes_vendored/pandas/plotting/_core.py index d901f41ef8..f8da9efdc0 100644 --- a/third_party/bigframes_vendored/pandas/plotting/_core.py +++ b/third_party/bigframes_vendored/pandas/plotting/_core.py @@ -266,10 +266,6 @@ def scatter( - A single color string referred to by name, RGB or RGBA code, for instance 'red' or '#a98d19'. - - A sequence of color strings referred to by name, RGB or RGBA - code, which will be used for each point's color recursively. For - instance ['green','yellow'] all points will be filled in green or - yellow, alternatively. - A column name or position whose values will be used to color the marker points according to a colormap. diff --git a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py index d72b9b7bd5..2a0acc8cfe 100644 --- a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py +++ b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py @@ -31,6 +31,34 @@ class KMeans(_BaseKMeans): n_clusters (int, default 8): The number of clusters to form as well as the number of centroids to generate. Default to 8. + + init ("kmeans++", "random" or "custom", default "kmeans++"): + The method of initializing the clusters. Default to "kmeans++" + + kmeas++: Initializes a number of centroids equal to the n_clusters value by using the k-means++ algorithm. Using this approach usually trains a better model than using random cluster initialization. + random: Initializes the centroids by randomly selecting a number of data points equal to the n_clusters value from the input data. + custom: Initializes the centroids using a provided column of type bool. Uses the rows with a value of True as the initial centroids. You specify the column to use by using the init_col option. + + init_col (str or None, default None): + The name of the column to use to initialize the centroids. This column must have a type of bool. If this column contains a value of True for a given row, then uses that row as an initial centroid. The number of True rows in this column must be equal to the value you have specified for the n_clusters option. + Only works with init method "custom". Default to None. + + distance_type ("euclidean" or "cosine", default "euclidean"): + The type of metric to use to compute the distance between two points. + Default to "euclidean". + + max_iter (int, default 20): + The maximum number of training iterations, where one iteration represents a single pass of the entire training data. Default to 20. + + tol (float, default 0.01): + The minimum relative loss improvement that is necessary to continue training. For example, a value of 0.01 specifies that each iteration must reduce the loss by 1% for training to continue. + Default to 0.01. + + warm_start (bool, default False): + Determines whether to train a model with new training data, new model options, or both. Unless you explicitly override them, the initial options used to train the model are used for the warm start run. + Default to False. + + """ def fit( diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py index 30c9c3b0b6..dcce75d1d9 100644 --- a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py +++ b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py @@ -20,21 +20,13 @@ class PCA(BaseEstimator, metaclass=ABCMeta): """Principal component analysis (PCA). - Linear dimensionality reduction using Singular Value Decomposition of the - data to project it to a lower dimensional space. The input data is centered - but not scaled for each feature before applying the SVD. - - It uses the LAPACK implementation of the full SVD or a randomized truncated - SVD by the method of Halko et al. 2009, depending on the shape of the input - data and the number of components to extract. - - It can also use the scipy.sparse.linalg ARPACK implementation of the - truncated SVD. - Args: - n_components (Optional[int], default 3): - Number of components to keep. if n_components is not set all components - are kept. + n_components (int, float or None, default None): + Number of components to keep. + If n_components is not set all components are kept. n_components = min(n_samples, n_features). + If 0 < n_components < 1, select the number of components such that the amount of variance that needs to be explained is greater than the percentage specified by n_components. + svd_solver ("full", "randomized" or "auto", default "auto"): + The solver to use to calculate the principal components. Details: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-create-pca#pca_solver. """ diff --git a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py index 63c62274fd..53a211dd7f 100644 --- a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py +++ b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py @@ -91,7 +91,7 @@ class RandomForestRegressor(ForestRegressor): to improve the predictive accuracy and control over-fitting. Args: - num_parallel_tree (Optional[int]): + n_estimators (Optional[int]): Number of parallel trees constructed during each iteration. Default to 100. Minimum value is 2. tree_method (Optional[str]): Specify which tree method to use. Default to "auto". If this parameter is set to @@ -116,10 +116,8 @@ class RandomForestRegressor(ForestRegressor): L1 regularization term on weights (xgb's alpha). Default to 0.0. reg_lambda (Optional[float]): L2 regularization term on weights (xgb's lambda). Default to 1.0. - early_stop (Optional[bool]): - Whether training should stop after the first iteration. Default to True. - min_rel_progress (Optional[float]): - Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01. + tol (Optional[float]): + Minimum relative loss improvement necessary to continue training. Default to 0.01. enable_global_explain (Optional[bool]): Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False. xgboost_version (Optional[str]): @@ -158,7 +156,7 @@ class RandomForestClassifier(ForestClassifier): improve the predictive accuracy and control over-fitting. Args: - num_parallel_tree (Optional[int]): + n_estimators (Optional[int]): Number of parallel trees constructed during each iteration. Default to 100. Minimum value is 2. tree_method (Optional[str]): Specify which tree method to use. Default to "auto". If this parameter is set to @@ -183,10 +181,8 @@ class RandomForestClassifier(ForestClassifier): L1 regularization term on weights (xgb's alpha). Default to 0.0. reg_lambda (Optional[float]): L2 regularization term on weights (xgb's lambda). Default to 1.0. - early_stop (Optional[bool]): - Whether training should stop after the first iteration. Default to True. - min_rel_progress (Optional[float]): - Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01. + tol (Optional[float]): + Minimum relative loss improvement necessary to continue training. Default to 0.01. enable_global_explain (Optional[bool]): Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False. xgboost_version (Optional[str]): diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_base.py b/third_party/bigframes_vendored/sklearn/linear_model/_base.py index ad2c872468..a845b782c0 100644 --- a/third_party/bigframes_vendored/sklearn/linear_model/_base.py +++ b/third_party/bigframes_vendored/sklearn/linear_model/_base.py @@ -63,26 +63,30 @@ class LinearRegression(RegressorMixin, LinearModel): the dataset, and the targets predicted by the linear approximation. Args: - optimize_strategy (str, default "normal_equation"): + optimize_strategy (str, default "auto_strategy"): The strategy to train linear regression models. Possible values are "auto_strategy", "batch_gradient_descent", "normal_equation". Default - to "normal_equation". + to "auto_strategy". fit_intercept (bool, default True): Default ``True``. Whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered). + l1_reg (float or None, default None): + The amount of L1 regularization applied. Default to None. Can't be set in "normal_equation" mode. If unset, value 0 is used. l2_reg (float, default 0.0): The amount of L2 regularization applied. Default to 0. max_iterations (int, default 20): The maximum number of training iterations or steps. Default to 20. - learn_rate_strategy (str, default "line_search"): + warm_start (bool, default False): + Determines whether to train a model with new training data, new model options, or both. Unless you explicitly override them, the initial options used to train the model are used for the warm start run. Default to False. + learning_rate (float or None, default None): + The learn rate for gradient descent when learning_rate_strategy='constant'. If unset, value 0.1 is used. If learning_rate_strategy='line_search', an error is returned. + learning_rate_strategy (str, default "line_search"): The strategy for specifying the learning rate during training. Default to "line_search". - early_stop (bool, default True): - Whether training should stop after the first iteration in which the relative loss improvement is less than the value specified for min_rel_progress. Default to True. - min_rel_progress (float, default 0.01): + tol (float, default 0.01): The minimum relative loss improvement that is necessary to continue training when EARLY_STOP is set to true. For example, a value of 0.01 specifies that each iteration must reduce the loss by 1% for training to continue. Default to 0.01. - ls_init_learn_rate (float, default 0.1): - Sets the initial learning rate that learn_rate_strategy='line_search' uses. This option can only be used if line_search is specified. Default to 0.1. + ls_init_learning_rate (float or None, default None): + Sets the initial learning rate that learning_rate_strategy='line_search' uses. This option can only be used if line_search is specified. If unset, value 0.1 is used. calculate_p_values (bool, default False): Specifies whether to compute p-values and standard errors during training. Default to False. enable_global_explain (bool, default False): diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py index 621c78d551..88ff32ea06 100644 --- a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py +++ b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py @@ -24,10 +24,14 @@ class LogisticRegression(LinearClassifierMixin, BaseEstimator): """Logistic Regression (aka logit, MaxEnt) classifier. Args: + optimize_strategy (str, default "auto_strategy"): + The strategy to train logistic regression models. Possible values are + "auto_strategy", "batch_gradient_descent", "normal_equation". Default + to "auto_strategy". fit_intercept (default True): Default True. Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function. - class_weights (dict or 'balanced', default None): + class_weight (dict or 'balanced', default None): Default None. Weights associated with classes in the form ``{class_label: weight}``.If not given, all classes are supposed to have weight one. The "balanced" mode uses the values of y to @@ -35,6 +39,26 @@ class LogisticRegression(LinearClassifierMixin, BaseEstimator): frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Dict isn't supported now. + l1_reg (float or None, default None): + The amount of L1 regularization applied. Default to None. Can't be set in "normal_equation" mode. If unset, value 0 is used. + l2_reg (float, default 0.0): + The amount of L2 regularization applied. Default to 0. + max_iterations (int, default 20): + The maximum number of training iterations or steps. Default to 20. + warm_start (bool, default False): + Determines whether to train a model with new training data, new model options, or both. Unless you explicitly override them, the initial options used to train the model are used for the warm start run. Default to False. + learning_rate (float or None, default None): + The learn rate for gradient descent when learning_rate_strategy='constant'. If unset, value 0.1 is used. If learning_rate_strategy='line_search', an error is returned. + learning_rate_strategy (str, default "line_search"): + The strategy for specifying the learning rate during training. Default to "line_search". + tol (float, default 0.01): + The minimum relative loss improvement that is necessary to continue training when EARLY_STOP is set to true. For example, a value of 0.01 specifies that each iteration must reduce the loss by 1% for training to continue. Default to 0.01. + ls_init_learning_rate (float or None, default None): + Sets the initial learning rate that learning_rate_strategy='line_search' uses. This option can only be used if line_search is specified. If unset, value 0.1 is used. + calculate_p_values (bool, default False): + Specifies whether to compute p-values and standard errors during training. Default to False. + enable_global_explain (bool, default False): + Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False. """ def fit( diff --git a/third_party/bigframes_vendored/sklearn/metrics/_classification.py b/third_party/bigframes_vendored/sklearn/metrics/_classification.py index a9d8038e59..00bbf8cd60 100644 --- a/third_party/bigframes_vendored/sklearn/metrics/_classification.py +++ b/third_party/bigframes_vendored/sklearn/metrics/_classification.py @@ -26,6 +26,24 @@ def accuracy_score(y_true, y_pred, normalize=True) -> float: """Accuracy classification score. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.ml.metrics + >>> bpd.options.display.progress_bar = None + + >>> y_true = bpd.DataFrame([0, 2, 1, 3]) + >>> y_pred = bpd.DataFrame([0, 1, 2, 3]) + >>> accuracy_score = bigframes.ml.metrics.accuracy_score(y_true, y_pred) + >>> accuracy_score + 0.5 + + If False, return the number of correctly classified samples: + + >>> accuracy_score = bigframes.ml.metrics.accuracy_score(y_true, y_pred, normalize=False) + >>> accuracy_score + 2 + Args: y_true (Series or DataFrame of shape (n_samples,)): Ground truth (correct) labels. @@ -58,6 +76,30 @@ def confusion_matrix( :math:`C_{0,0}`, false negatives is :math:`C_{1,0}`, true positives is :math:`C_{1,1}` and false positives is :math:`C_{0,1}`. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.ml.metrics + >>> bpd.options.display.progress_bar = None + + >>> y_true = bpd.DataFrame([2, 0, 2, 2, 0, 1]) + >>> y_pred = bpd.DataFrame([0, 0, 2, 2, 0, 2]) + >>> confusion_matrix = bigframes.ml.metrics.confusion_matrix(y_true, y_pred) + >>> confusion_matrix + 0 1 2 + 0 2 0 0 + 1 0 0 1 + 2 1 0 2 + + >>> y_true = bpd.DataFrame(["cat", "ant", "cat", "cat", "ant", "bird"]) + >>> y_pred = bpd.DataFrame(["ant", "ant", "cat", "cat", "ant", "cat"]) + >>> confusion_matrix = bigframes.ml.metrics.confusion_matrix(y_true, y_pred) + >>> confusion_matrix + ant bird cat + ant 2 0 0 + bird 0 0 1 + cat 1 0 2 + Args: y_true (Series or DataFrame of shape (n_samples,)): Ground truth (correct) target values. @@ -86,6 +128,22 @@ def recall_score( The best value is 1 and the worst value is 0. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.ml.metrics + >>> bpd.options.display.progress_bar = None + + >>> y_true = bpd.DataFrame([0, 1, 2, 0, 1, 2]) + >>> y_pred = bpd.DataFrame([0, 2, 1, 0, 0, 1]) + >>> recall_score = bigframes.ml.metrics.recall_score(y_true, y_pred, average=None) + >>> recall_score + 0 1 + 1 0 + 2 0 + dtype: int64 + + Args: y_true (Series or DataFrame of shape (n_samples,)): Ground truth (correct) target values. @@ -95,6 +153,7 @@ def recall_score( default='binary'): This parameter is required for multiclass/multilabel targets. Possible values are 'None', 'micro', 'macro', 'samples', 'weighted', 'binary'. + Only average=None is supported. Returns: float (if average is not None) or Series of float of shape n_unique_labels,): Recall @@ -118,6 +177,21 @@ def precision_score( The best value is 1 and the worst value is 0. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.ml.metrics + >>> bpd.options.display.progress_bar = None + + >>> y_true = bpd.DataFrame([0, 1, 2, 0, 1, 2]) + >>> y_pred = bpd.DataFrame([0, 2, 1, 0, 0, 1]) + >>> precision_score = bigframes.ml.metrics.precision_score(y_true, y_pred, average=None) + >>> precision_score + 0 0.666667 + 1 0.000000 + 2 0.000000 + dtype: float64 + Args: y_true: Series or DataFrame of shape (n_samples,) Ground truth (correct) target values. @@ -127,6 +201,7 @@ def precision_score( default='binary' This parameter is required for multiclass/multilabel targets. Possible values are 'None', 'micro', 'macro', 'samples', 'weighted', 'binary'. + Only average=None is supported. Returns: precision: float (if average is not None) or Series of float of shape \ @@ -153,6 +228,21 @@ def f1_score( the F1 score of each class with weighting depending on the ``average`` parameter. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.ml.metrics + >>> bpd.options.display.progress_bar = None + + >>> y_true = bpd.DataFrame([0, 1, 2, 0, 1, 2]) + >>> y_pred = bpd.DataFrame([0, 2, 1, 0, 0, 1]) + >>> f1_score = bigframes.ml.metrics.f1_score(y_true, y_pred, average=None) + >>> f1_score + 0 0.8 + 1 0.0 + 2 0.0 + dtype: float64 + Args: y_true: Series or DataFrame of shape (n_samples,) Ground truth (correct) target values. diff --git a/third_party/bigframes_vendored/xgboost/sklearn.py b/third_party/bigframes_vendored/xgboost/sklearn.py index dfd0ba7356..250e34dc2c 100644 --- a/third_party/bigframes_vendored/xgboost/sklearn.py +++ b/third_party/bigframes_vendored/xgboost/sklearn.py @@ -55,7 +55,7 @@ class XGBRegressor(XGBModel, XGBRegressorBase): XGBoost regression model. Args: - num_parallel_tree (Optional[int]): + n_estimators (Optional[int]): Number of parallel trees constructed during each iteration. Default to 1. booster (Optional[str]): Specify which booster to use: gbtree or dart. Default to "gbtree". @@ -84,14 +84,12 @@ class XGBRegressor(XGBModel, XGBRegressorBase): L1 regularization term on weights (xgb's alpha). Default to 0.0. reg_lambda (Optional[float]): L2 regularization term on weights (xgb's lambda). Default to 1.0. - early_stop (Optional[bool]): - Whether training should stop after the first iteration. Default to True. learning_rate (Optional[float]): Boosting learning rate (xgb's "eta"). Default to 0.3. max_iterations (Optional[int]): Maximum number of rounds for boosting. Default to 20. - min_rel_progress (Optional[float]): - Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01. + tol (Optional[float]): + Minimum relative loss improvement necessary to continue training. Default to 0.01. enable_global_explain (Optional[bool]): Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False. xgboost_version (Optional[str]): @@ -104,7 +102,7 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase): XGBoost classifier model. Args: - num_parallel_tree (Optional[int]): + n_estimators (Optional[int]): Number of parallel trees constructed during each iteration. Default to 1. booster (Optional[str]): Specify which booster to use: gbtree or dart. Default to "gbtree". @@ -133,14 +131,12 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase): L1 regularization term on weights (xgb's alpha). Default to 0.0. reg_lambda (Optional[float]): L2 regularization term on weights (xgb's lambda). Default to 1.0. - early_stop (Optional[bool]): - Whether training should stop after the first iteration. Default to True. learning_rate (Optional[float]): Boosting learning rate (xgb's "eta"). Default to 0.3. max_iterations (Optional[int]): Maximum number of rounds for boosting. Default to 20. - min_rel_progress (Optional[float]): - Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01. + tol (Optional[float]): + Minimum relative loss improvement necessary to continue training. Default to 0.01. enable_global_explain (Optional[bool]): Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False. xgboost_version (Optional[str]):