From 7a196b33e222b49ac4996b210c6bbbff4eea8000 Mon Sep 17 00:00:00 2001 From: ronanstokes-db Date: Wed, 5 Mar 2025 18:20:56 -0800 Subject: [PATCH 1/4] modified files to build for Databricks runtime 11.3 LTS compliant versions --- .github/workflows/push.yml | 4 ++-- .github/workflows/release.yml | 4 ++-- CHANGELOG.md | 7 +++++++ CONTRIBUTING.md | 17 +++++++---------- README.md | 6 +++--- makefile | 4 ++-- python/dev_require.txt | 18 +++++++++--------- python/require.txt | 18 +++++++++--------- 8 files changed, 41 insertions(+), 37 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index be1be909..936414f0 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -31,10 +31,10 @@ jobs: sudo update-alternatives --set java /usr/lib/jvm/temurin-8-jdk-amd64/bin/java java -version - - name: Set up Python 3.8 + - name: Set up Python 3.9.21 uses: actions/setup-python@v5 with: - python-version: '3.8.12' + python-version: '3.9.21' cache: 'pipenv' - name: Check Python version diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 77d81630..26761f9c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -24,10 +24,10 @@ jobs: sudo update-alternatives --set java /usr/lib/jvm/temurin-8-jdk-amd64/bin/java java -version - - name: Set up Python 3.8 + - name: Set up Python 3.9.21 uses: actions/setup-python@v5 with: - python-version: '3.8.12' + python-version: '3.9.21' cache: 'pipenv' - name: Check Python version diff --git a/CHANGELOG.md b/CHANGELOG.md index dc4229e2..f90e8fff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,13 @@ All notable changes to the Databricks Labs Data Generator will be documented in #### Fixed * Updated build scripts to use Ubuntu 22.04 to correspond to environment in Databricks runtime +#### Changed +* Changed base Databricks runtime version to DBR 11.3 LTS (based on Apache Spark 3.3.0) + +#### Added +* Added support for serialization to/from JSON format + + ### Version 0.4.0 Hotfix 2 #### Fixed diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e56b7617..a8bdcc8f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,10 +19,7 @@ Dependent packages are not installed automatically by the `dbldatagen` package. ## Python compatibility -The code has been tested with Python 3.8.12 and later. - -Older releases were tested with Python 3.7.5 but as of this release, it requires the Databricks -runtime 9.1 LTS or later. +The code has been tested with Python 3.9.21 and later. ## Checking your code for common issues @@ -46,7 +43,7 @@ Our recommended mechanism for building the code is to use a `conda` or `pipenv` But it can be built with any Python virtualization environment. ### Spark dependencies -The builds have been tested against Spark 3.2.1. This requires the OpenJDK 1.8.56 or later version of Java 8. +The builds have been tested against Spark 3.3.0. This requires the OpenJDK 1.8.56 or later version of Java 8. The Databricks runtimes use the Azul Zulu version of OpenJDK 8 and we have used these in local testing. These are not installed automatically by the build process, so you will need to install them separately. @@ -75,7 +72,7 @@ To build with `pipenv`, perform the following commands: - Run `make dist` from the main project directory - The resulting wheel file will be placed in the `dist` subdirectory -The resulting build has been tested against Spark 3.2.1 +The resulting build has been tested against Spark 3.3.0 ## Creating the HTML documentation @@ -161,19 +158,19 @@ See https://legacy.python.org/dev/peps/pep-0008/ # Github expectations When running the unit tests on Github, the environment should use the same environment as the latest Databricks -runtime latest LTS release. While compatibility is preserved on LTS releases from Databricks runtime 10.4 onwards, +runtime latest LTS release. While compatibility is preserved on LTS releases from Databricks runtime 11.3 onwards, unit tests will be run on the environment corresponding to the latest LTS release. -Libraries will use the same versions as the earliest supported LTS release - currently 10.4 LTS +Libraries will use the same versions as the earliest supported LTS release - currently 11.3 LTS This means for the current build: - Use of Ubuntu 22.04 for the test runner - Use of Java 8 -- Use of Python 3.11 +- Use of Python 3.9.21 when testing / building the image See the following resources for more information = https://docs.databricks.com/en/release-notes/runtime/15.4lts.html -- https://docs.databricks.com/en/release-notes/runtime/10.4lts.html +- https://docs.databricks.com/en/release-notes/runtime/11.3lts.html - https://github.com/actions/runner-images/issues/10636 diff --git a/README.md b/README.md index 1a6b0738..3c000b7d 100644 --- a/README.md +++ b/README.md @@ -83,8 +83,8 @@ The documentation [installation notes](https://databrickslabs.github.io/dbldatag contains details of installation using alternative mechanisms. ## Compatibility -The Databricks Labs Data Generator framework can be used with Pyspark 3.1.2 and Python 3.8 or later. These are -compatible with the Databricks runtime 10.4 LTS and later releases. For full Unity Catalog support, +The Databricks Labs Data Generator framework can be used with Pyspark 3.3.0 and Python 3.9.21 or later. These are +compatible with the Databricks runtime 11.3 LTS and later releases. For full Unity Catalog support, we recommend using Databricks runtime 13.2 or later (Databricks 13.3 LTS or above preferred) For full library compatibility for a specific Databricks Spark release, see the Databricks @@ -155,7 +155,7 @@ The GitHub repository also contains further examples in the examples directory. ## Spark and Databricks Runtime Compatibility The `dbldatagen` package is intended to be compatible with recent LTS versions of the Databricks runtime, including -older LTS versions at least from 10.4 LTS and later. It also aims to be compatible with Delta Live Table runtimes, +older LTS versions at least from 11.3 LTS and later. It also aims to be compatible with Delta Live Table runtimes, including `current` and `preview`. While we don't specifically drop support for older runtimes, changes in Pyspark APIs or diff --git a/makefile b/makefile index e76e0952..6e597efc 100644 --- a/makefile +++ b/makefile @@ -27,11 +27,11 @@ prepare: clean create-dev-env: @echo "$(OK_COLOR)=> making conda dev environment$(NO_COLOR)" - conda create -n $(ENV_NAME) python=3.8.10 + conda create -n $(ENV_NAME) python=3.9.21 create-github-build-env: @echo "$(OK_COLOR)=> making conda dev environment$(NO_COLOR)" - conda create -n pip_$(ENV_NAME) python=3.8 + conda create -n pip_$(ENV_NAME) python=3.9.21 install-dev-dependencies: @echo "$(OK_COLOR)=> installing dev environment requirements$(NO_COLOR)" diff --git a/python/dev_require.txt b/python/dev_require.txt index cf35daeb..3eabad8f 100644 --- a/python/dev_require.txt +++ b/python/dev_require.txt @@ -1,19 +1,19 @@ # The following packages are used in building the test data generator framework. # All packages used are already installed in the Databricks runtime environment for version 6.5 or later numpy==1.22.0 -pandas==1.2.4 +pandas==1.3.4 pickleshare==0.7.5 py4j>=0.10.9.3 -pyarrow==4.0.1 -pyspark>=3.2.1,<=3.3.0 -python-dateutil==2.8.1 -six==1.15.0 -pyparsing==2.4.7 +pyarrow==7.0.0 +pyspark==3.3.0 +python-dateutil==2.8.2 +six==1.16.0 +pyparsing==3.0.4 jmespath==0.10.0 # The following packages are required for development only -wheel==0.36.2 -setuptools==52.0.0 +wheel==0.37.0 +setuptools==58.0.4 bumpversion pytest pytest-cov @@ -28,7 +28,7 @@ sphinx_rtd_theme nbsphinx numpydoc==0.8 pypandoc -ipython==7.22.0 +ipython==7.32.0 recommonmark sphinx-markdown-builder Jinja2 < 3.1 diff --git a/python/require.txt b/python/require.txt index de5c0250..bad13fa2 100644 --- a/python/require.txt +++ b/python/require.txt @@ -1,19 +1,19 @@ # The following packages are used in building the test data generator framework. # All packages used are already installed in the Databricks runtime environment for version 6.5 or later numpy==1.22.0 -pandas==1.2.5 +pandas==1.3.4 pickleshare==0.7.5 py4j==0.10.9 -pyarrow==4.0.1 -pyspark>=3.2.1 -python-dateutil==2.8.1 -six==1.15.0 -pyparsing==2.4.7 +pyarrow==7.0.0 +pyspark==3.3.0 +python-dateutil==2.8.2 +six==1.16.0 +pyparsing==3.0.4 jmespath==0.10.0 # The following packages are required for development only -wheel==0.36.2 -setuptools==52.0.0 +wheel==0.37.0 +setuptools==58.0.4 bumpversion pytest pytest-cov @@ -27,7 +27,7 @@ sphinx_rtd_theme nbsphinx numpydoc==0.8 pypandoc -ipython==7.22.0 +ipython==7.32.0 recommonmark sphinx-markdown-builder Jinja2 < 3.1 From 1547cb0ec2ce665b1152de376536bd486de5db34 Mon Sep 17 00:00:00 2001 From: ronanstokes-db Date: Wed, 5 Mar 2025 18:31:46 -0800 Subject: [PATCH 2/4] modified files to build for Databricks runtime 11.3 LTS compliant versions --- Pipfile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Pipfile b/Pipfile index 77f72b01..91a2fc8f 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ sphinx = ">=2.0.0,<3.1.0" nbsphinx = "*" numpydoc = "==0.8" pypandoc = "*" -ipython = "==7.31.1" +ipython = "==7.32.0" pydata-sphinx-theme = "*" recommonmark = "*" sphinx-markdown-builder = "*" @@ -19,13 +19,13 @@ prospector = "*" [packages] numpy = "==1.22.0" -pyspark = "==3.1.3" -pyarrow = "==4.0.1" -wheel = "==0.38.4" -pandas = "==1.2.4" -setuptools = "==65.6.3" -pyparsing = "==2.4.7" +pyspark = "==3.3.0" +pyarrow = "==7.0.0" +wheel = "==0.37.0" +pandas = "==1.3.4" +setuptools = "==68.0.4" +pyparsing = "==3.0.4" jmespath = "==0.10.0" [requires] -python_version = "3.8.12" +python_version = "3.9.21" From 7f40a798dd7cfe5abffc53ddf41b5f0d2fc09b73 Mon Sep 17 00:00:00 2001 From: ronanstokes-db Date: Wed, 5 Mar 2025 18:37:23 -0800 Subject: [PATCH 3/4] modified files to build for Databricks runtime 11.3 LTS compliant versions --- Pipfile | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Pipfile b/Pipfile index 91a2fc8f..6ab8fcaa 100644 --- a/Pipfile +++ b/Pipfile @@ -23,7 +23,7 @@ pyspark = "==3.3.0" pyarrow = "==7.0.0" wheel = "==0.37.0" pandas = "==1.3.4" -setuptools = "==68.0.4" +setuptools = "==58.0.4" pyparsing = "==3.0.4" jmespath = "==0.10.0" diff --git a/setup.py b/setup.py index 8fb35dac..5baa5541 100644 --- a/setup.py +++ b/setup.py @@ -55,5 +55,5 @@ "Intended Audience :: Developers", "Intended Audience :: System Administrators" ], - python_requires='>=3.8.10', + python_requires='>=3.9.21', ) From 04d6f58146deedbc8ca149ee6d9539cb31096c26 Mon Sep 17 00:00:00 2001 From: ronanstokes-db Date: Wed, 5 Mar 2025 20:13:45 -0800 Subject: [PATCH 4/4] modified files to build for Databricks runtime 11.3 LTS compliant versions --- dbldatagen/column_generation_spec.py | 18 +++++++++++------- dbldatagen/data_analyzer.py | 4 ++-- dbldatagen/data_generator.py | 10 +++++----- dbldatagen/text_generator_plugins.py | 4 ++-- dbldatagen/text_generators.py | 3 ++- tests/test_basic_test.py | 10 +++++++--- tests/test_complex_columns.py | 11 ++++++----- tests/test_constraints.py | 8 ++++++-- tests/test_text_generation.py | 4 +++- 9 files changed, 44 insertions(+), 28 deletions(-) diff --git a/dbldatagen/column_generation_spec.py b/dbldatagen/column_generation_spec.py index 713d8439..195db979 100644 --- a/dbldatagen/column_generation_spec.py +++ b/dbldatagen/column_generation_spec.py @@ -95,7 +95,7 @@ class ColumnGenerationSpec(SerializableToDict): # restrict spurious messages from java gateway logging.getLogger("py4j").setLevel(logging.WARNING) - def __init__(self, name, colType=None, minValue=0, maxValue=None, step=1, prefix='', random=False, + def __init__(self, name, colType=None, *, minValue=0, maxValue=None, step=1, prefix='', random=False, distribution=None, baseColumn=None, randomSeed=None, randomSeedMethod=None, implicit=False, omit=False, nullable=True, debug=False, verbose=False, seedColumnName=DEFAULT_SEED_COLUMN, @@ -529,18 +529,22 @@ def _setup_logger(self): else: self.logger.setLevel(logging.WARNING) - def _computeAdjustedRangeForColumn(self, colType, c_min, c_max, c_step, c_begin, c_end, c_interval, c_range, + def _computeAdjustedRangeForColumn(self, colType, c_min, c_max, c_step, *, c_begin, c_end, c_interval, c_range, c_unique): """Determine adjusted range for data column """ assert colType is not None, "`colType` must be non-None instance" if type(colType) is DateType or type(colType) is TimestampType: - return self._computeAdjustedDateTimeRangeForColumn(colType, c_begin, c_end, c_interval, c_range, c_unique) + return self._computeAdjustedDateTimeRangeForColumn(colType, c_begin, c_end, c_interval, + c_range=c_range, + c_unique=c_unique) else: - return self._computeAdjustedNumericRangeForColumn(colType, c_min, c_max, c_step, c_range, c_unique) + return self._computeAdjustedNumericRangeForColumn(colType, c_min, c_max, c_step, + c_range=c_range, + c_unique=c_unique) - def _computeAdjustedNumericRangeForColumn(self, colType, c_min, c_max, c_step, c_range, c_unique): + def _computeAdjustedNumericRangeForColumn(self, colType, c_min, c_max, c_step, *, c_range, c_unique): """Determine adjusted range for data column Rules: @@ -589,7 +593,7 @@ def _computeAdjustedNumericRangeForColumn(self, colType, c_min, c_max, c_step, c return result - def _computeAdjustedDateTimeRangeForColumn(self, colType, c_begin, c_end, c_interval, c_range, c_unique): + def _computeAdjustedDateTimeRangeForColumn(self, colType, c_begin, c_end, c_interval, *, c_range, c_unique): """Determine adjusted range for Date or Timestamp data column """ effective_begin, effective_end, effective_interval = None, None, None @@ -656,7 +660,7 @@ def _getUniformRandomSQLExpression(self, col_name): else: return "rand()" - def _getScaledIntSQLExpression(self, col_name, scale, base_columns, base_datatypes=None, compute_method=None, + def _getScaledIntSQLExpression(self, col_name, scale, base_columns, *, base_datatypes=None, compute_method=None, normalize=False): """ Get scaled numeric expression diff --git a/dbldatagen/data_analyzer.py b/dbldatagen/data_analyzer.py index 463c0554..7ffe7124 100644 --- a/dbldatagen/data_analyzer.py +++ b/dbldatagen/data_analyzer.py @@ -92,7 +92,7 @@ def _displayRow(self, row): return ", ".join(results) - def _addMeasureToSummary(self, measureName, summaryExpr="''", fieldExprs=None, dfData=None, rowLimit=1, + def _addMeasureToSummary(self, measureName, *, summaryExpr="''", fieldExprs=None, dfData=None, rowLimit=1, dfSummary=None): """ Add a measure to the summary dataframe @@ -340,7 +340,7 @@ def _generatorDefaultAttributesFromType(cls, sqlType, colName=None, dataSummary= return result @classmethod - def _scriptDataGeneratorCode(cls, schema, dataSummary=None, sourceDf=None, suppressOutput=False, name=None): + def _scriptDataGeneratorCode(cls, schema, *, dataSummary=None, sourceDf=None, suppressOutput=False, name=None): """ Generate outline data generator code from an existing dataframe diff --git a/dbldatagen/data_generator.py b/dbldatagen/data_generator.py index 3a9f805f..d7c0884b 100644 --- a/dbldatagen/data_generator.py +++ b/dbldatagen/data_generator.py @@ -76,7 +76,7 @@ class DataGenerator(SerializableToDict): # logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.NOTSET) - def __init__(self, sparkSession=None, name=None, randomSeedMethod=None, + def __init__(self, sparkSession=None, name=None, *, randomSeedMethod=None, rows=1000000, startingId=0, randomSeed=None, partitions=None, verbose=False, batchSize=None, debug=False, seedColumnName=DEFAULT_SEED_COLUMN, random=False, @@ -782,7 +782,7 @@ def _checkColumnOrColumnList(self, columns, allowId=False): f" column `{columns}` must refer to defined column") return True - def withColumnSpec(self, colName, minValue=None, maxValue=None, step=1, prefix=None, + def withColumnSpec(self, colName, *, minValue=None, maxValue=None, step=1, prefix=None, random=None, distribution=None, implicit=False, dataRange=None, omit=False, baseColumn=None, **kwargs): """ add a column specification for an existing column @@ -842,7 +842,7 @@ def hasColumnSpec(self, colName): """ return colName in self._columnSpecsByName - def withColumn(self, colName, colType=StringType(), minValue=None, maxValue=None, step=1, + def withColumn(self, colName, colType=StringType(), *, minValue=None, maxValue=None, step=1, dataRange=None, prefix=None, random=None, distribution=None, baseColumn=None, nullable=True, omit=False, implicit=False, noWarn=False, @@ -1058,7 +1058,7 @@ def withStructColumn(self, colName, fields=None, asJson=False, **kwargs): return newDf - def _generateColumnDefinition(self, colName, colType=None, baseColumn=None, + def _generateColumnDefinition(self, colName, colType=None, baseColumn=None, *, implicit=False, omit=False, nullable=True, **kwargs): """ generate field definition and column spec @@ -1591,7 +1591,7 @@ def scriptTable(self, name=None, location=None, tableFormat="delta", asHtml=Fals return results - def scriptMerge(self, tgtName=None, srcName=None, updateExpr=None, delExpr=None, joinExpr=None, timeExpr=None, + def scriptMerge(self, tgtName=None, srcName=None, *, updateExpr=None, delExpr=None, joinExpr=None, timeExpr=None, insertExpr=None, useExplicitNames=True, updateColumns=None, updateColumnExprs=None, diff --git a/dbldatagen/text_generator_plugins.py b/dbldatagen/text_generator_plugins.py index 8b20657c..135d50eb 100644 --- a/dbldatagen/text_generator_plugins.py +++ b/dbldatagen/text_generator_plugins.py @@ -69,7 +69,7 @@ class _FnCallContext: def __init__(self, txtGen): self.textGenerator = txtGen - def __init__(self, fn, init=None, initPerBatch=False, name=None, rootProperty=None): + def __init__(self, fn, *, init=None, initPerBatch=False, name=None, rootProperty=None): super().__init__() assert fn is not None or callable(fn), "Function must be provided wiith signature fn(context, oldValue)" assert init is None or callable(init), "Init function must be a callable function or lambda if passed" @@ -284,7 +284,7 @@ class FakerTextFactory(PyfuncTextFactory): _defaultFakerTextFactory = None - def __init__(self, locale=None, providers=None, name="FakerText", lib=None, + def __init__(self, *, locale=None, providers=None, name="FakerText", lib=None, rootClass=None): super().__init__(name) diff --git a/dbldatagen/text_generators.py b/dbldatagen/text_generators.py index b34814f3..19b6ea66 100644 --- a/dbldatagen/text_generators.py +++ b/dbldatagen/text_generators.py @@ -429,7 +429,8 @@ def _prepareTemplateStrings(self, genTemplate, escapeSpecialMeaning=False): return num_placeholders, retval - def _applyTemplateStringsForTemplate(self, baseValue, genTemplate, placeholders, rnds, escapeSpecialMeaning=False): + def _applyTemplateStringsForTemplate(self, baseValue, genTemplate, placeholders, rnds, *, + escapeSpecialMeaning=False): """ Vectorized implementation of template driven text substitution Apply substitutions to placeholders using random numbers diff --git a/tests/test_basic_test.py b/tests/test_basic_test.py index 35d4f382..9b9ddf31 100644 --- a/tests/test_basic_test.py +++ b/tests/test_basic_test.py @@ -1,6 +1,6 @@ import logging -import pytest +import pytest from pyspark.sql import functions as F from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType @@ -146,7 +146,9 @@ def test_alt_seed_column(self, caplog): IntegerType(), {'uniqueValues': 5000, 'random': True}) ]) - def test_seed_column_nocollision(self, caseName, withIdOutput, idType, additionalOptions, caplog): + def test_seed_column_nocollision(self, caseName, withIdOutput, idType, additionalOptions, caplog): \ + # pylint: disable=too-many-positional-arguments + logging.info(f"case: {caseName}") # caplog fixture captures log content @@ -189,7 +191,9 @@ def test_seed_column_nocollision(self, caseName, withIdOutput, idType, additiona ("with no Id output _id float", False, FloatType(), "_id"), ("with no Id output _id int", False, IntegerType(), "_id"), ]) - def test_seed_column_expected_collision1(self, caseName, withIdOutput, idType, idName, caplog): + def test_seed_column_expected_collision1(self, caseName, withIdOutput, idType, idName, caplog): \ + # pylint: disable=too-many-positional-arguments + logging.info(f"case: {caseName}") # caplog fixture captures log content diff --git a/tests/test_complex_columns.py b/tests/test_complex_columns.py index 58cdf13f..8a1606a5 100644 --- a/tests/test_complex_columns.py +++ b/tests/test_complex_columns.py @@ -91,7 +91,7 @@ def test_unitialized_complex_fields2(self, complexFieldType, expectedType, inval invalid_data_count = df.where(invalidValueCondition).count() assert invalid_data_count == 0, "Not expecting invalid values" - @pytest.mark.parametrize("complexFieldType, expectedType, valueInitializer, validValueCondition", + @pytest.mark.parametrize("complexFieldType, expectedType, valueInit, validCond", [("array", ArrayType(IntegerType()), "array(1,2,3)", "complex_field[1] = 2"), ("array>", ArrayType(ArrayType(StringType())), "array(array('one','two'))", @@ -111,8 +111,9 @@ def test_unitialized_complex_fields2(self, complexFieldType, expectedType, inval "complex_field is not Null and complex_field.c = code2" ) ]) - def test_initialized_complex_fields(self, complexFieldType, expectedType, valueInitializer, validValueCondition, - setupLogging): + def test_initialized_complex_fields(self, complexFieldType, expectedType, valueInit, validCond, setupLogging): \ + # pylint: disable=too-many-positional-arguments + data_rows = 1000 df_spec = (dg.DataGenerator(spark, name="test_data_set1", rows=data_rows, partitions=spark.sparkContext.defaultParallelism) @@ -122,7 +123,7 @@ def test_initialized_complex_fields(self, complexFieldType, expectedType, valueI .withColumn("code3", StringType(), values=['a', 'b', 'c']) .withColumn("code4", StringType(), values=['a', 'b', 'c'], random=True) .withColumn("code5", StringType(), values=['a', 'b', 'c'], random=True, weights=[9, 1, 1]) - .withColumn("complex_field", complexFieldType, expr=valueInitializer, + .withColumn("complex_field", complexFieldType, expr=valueInit, baseColumn=['code1', 'code2', 'code3', 'code4', 'code5']) ) @@ -132,7 +133,7 @@ def test_initialized_complex_fields(self, complexFieldType, expectedType, valueI complex_type = df.schema["complex_field"].dataType assert complex_type == expectedType - valid_data_count = df.where(validValueCondition).count() + valid_data_count = df.where(validCond).count() assert valid_data_count == data_rows, "Not expecting invalid values" def test_basic_arrays_with_columns(self, setupLogging): diff --git a/tests/test_constraints.py b/tests/test_constraints.py index 93f61774..6928c68b 100644 --- a/tests/test_constraints.py +++ b/tests/test_constraints.py @@ -109,7 +109,9 @@ def test_constraint_filter_expression_cache(self): ("id", "==", 50, 1), ("id", "!=", 50, 98), ]) - def test_scalar_relation(self, generationSpec1, column, operation, literalValue, expectedRows): + def test_scalar_relation(self, column, operation, literalValue, expectedRows, generationSpec1): \ + # pylint: disable=too-many-positional-arguments + testDataSpec = (generationSpec1 .withConstraints([SqlExpr("id < 100"), SqlExpr("id > 0")]) @@ -275,7 +277,9 @@ def test_unique_combinations2(self, generationSpec3): ("id", 10, 20, True, 9), ("id", 10, 20, False, 11), ]) - def test_literal_range(self, generationSpec2, column, minValue, maxValue, strictFlag, expectedRows): + def test_literal_range(self, column, minValue, maxValue, strictFlag, expectedRows, generationSpec2): \ + # pylint: disable=too-many-positional-arguments + testDataSpec = (generationSpec2 .withConstraints([SqlExpr("id < 100"), SqlExpr("id > 0")]) diff --git a/tests/test_text_generation.py b/tests/test_text_generation.py index fb23d9d3..5deb137b 100644 --- a/tests/test_text_generation.py +++ b/tests/test_text_generation.py @@ -69,7 +69,9 @@ def test_text_generator_basics(self): (r'\n.\n.\n.\n', False, 15, None, True), (r'\n.\n.\n.\n', False, 15, -1, True), ]) - def test_random_number_generator(self, template, escapeSpecial, low, high, useSystemLib): + def test_random_number_generator(self, template, escapeSpecial, low, high, useSystemLib): \ + # pylint: disable=too-many-positional-arguments + """ As the test coverage tools dont detect code only used in UDFs, lets add some explicit tests for the underlying code""" test_template = TemplateGenerator(template, escapeSpecialChars=escapeSpecial)