Skip to content

Commit 04d6f58

Browse files
modified files to build for Databricks runtime 11.3 LTS compliant versions
1 parent 7f40a79 commit 04d6f58

9 files changed

+44
-28
lines changed

dbldatagen/column_generation_spec.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ class ColumnGenerationSpec(SerializableToDict):
9595
# restrict spurious messages from java gateway
9696
logging.getLogger("py4j").setLevel(logging.WARNING)
9797

98-
def __init__(self, name, colType=None, minValue=0, maxValue=None, step=1, prefix='', random=False,
98+
def __init__(self, name, colType=None, *, minValue=0, maxValue=None, step=1, prefix='', random=False,
9999
distribution=None, baseColumn=None, randomSeed=None, randomSeedMethod=None,
100100
implicit=False, omit=False, nullable=True, debug=False, verbose=False,
101101
seedColumnName=DEFAULT_SEED_COLUMN,
@@ -529,18 +529,22 @@ def _setup_logger(self):
529529
else:
530530
self.logger.setLevel(logging.WARNING)
531531

532-
def _computeAdjustedRangeForColumn(self, colType, c_min, c_max, c_step, c_begin, c_end, c_interval, c_range,
532+
def _computeAdjustedRangeForColumn(self, colType, c_min, c_max, c_step, *, c_begin, c_end, c_interval, c_range,
533533
c_unique):
534534
"""Determine adjusted range for data column
535535
"""
536536
assert colType is not None, "`colType` must be non-None instance"
537537

538538
if type(colType) is DateType or type(colType) is TimestampType:
539-
return self._computeAdjustedDateTimeRangeForColumn(colType, c_begin, c_end, c_interval, c_range, c_unique)
539+
return self._computeAdjustedDateTimeRangeForColumn(colType, c_begin, c_end, c_interval,
540+
c_range=c_range,
541+
c_unique=c_unique)
540542
else:
541-
return self._computeAdjustedNumericRangeForColumn(colType, c_min, c_max, c_step, c_range, c_unique)
543+
return self._computeAdjustedNumericRangeForColumn(colType, c_min, c_max, c_step,
544+
c_range=c_range,
545+
c_unique=c_unique)
542546

543-
def _computeAdjustedNumericRangeForColumn(self, colType, c_min, c_max, c_step, c_range, c_unique):
547+
def _computeAdjustedNumericRangeForColumn(self, colType, c_min, c_max, c_step, *, c_range, c_unique):
544548
"""Determine adjusted range for data column
545549
546550
Rules:
@@ -589,7 +593,7 @@ def _computeAdjustedNumericRangeForColumn(self, colType, c_min, c_max, c_step, c
589593

590594
return result
591595

592-
def _computeAdjustedDateTimeRangeForColumn(self, colType, c_begin, c_end, c_interval, c_range, c_unique):
596+
def _computeAdjustedDateTimeRangeForColumn(self, colType, c_begin, c_end, c_interval, *, c_range, c_unique):
593597
"""Determine adjusted range for Date or Timestamp data column
594598
"""
595599
effective_begin, effective_end, effective_interval = None, None, None
@@ -656,7 +660,7 @@ def _getUniformRandomSQLExpression(self, col_name):
656660
else:
657661
return "rand()"
658662

659-
def _getScaledIntSQLExpression(self, col_name, scale, base_columns, base_datatypes=None, compute_method=None,
663+
def _getScaledIntSQLExpression(self, col_name, scale, base_columns, *, base_datatypes=None, compute_method=None,
660664
normalize=False):
661665
""" Get scaled numeric expression
662666

dbldatagen/data_analyzer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def _displayRow(self, row):
9292

9393
return ", ".join(results)
9494

95-
def _addMeasureToSummary(self, measureName, summaryExpr="''", fieldExprs=None, dfData=None, rowLimit=1,
95+
def _addMeasureToSummary(self, measureName, *, summaryExpr="''", fieldExprs=None, dfData=None, rowLimit=1,
9696
dfSummary=None):
9797
""" Add a measure to the summary dataframe
9898
@@ -340,7 +340,7 @@ def _generatorDefaultAttributesFromType(cls, sqlType, colName=None, dataSummary=
340340
return result
341341

342342
@classmethod
343-
def _scriptDataGeneratorCode(cls, schema, dataSummary=None, sourceDf=None, suppressOutput=False, name=None):
343+
def _scriptDataGeneratorCode(cls, schema, *, dataSummary=None, sourceDf=None, suppressOutput=False, name=None):
344344
"""
345345
Generate outline data generator code from an existing dataframe
346346

dbldatagen/data_generator.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class DataGenerator(SerializableToDict):
7676

7777
# logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.NOTSET)
7878

79-
def __init__(self, sparkSession=None, name=None, randomSeedMethod=None,
79+
def __init__(self, sparkSession=None, name=None, *, randomSeedMethod=None,
8080
rows=1000000, startingId=0, randomSeed=None, partitions=None, verbose=False,
8181
batchSize=None, debug=False, seedColumnName=DEFAULT_SEED_COLUMN,
8282
random=False,
@@ -782,7 +782,7 @@ def _checkColumnOrColumnList(self, columns, allowId=False):
782782
f" column `{columns}` must refer to defined column")
783783
return True
784784

785-
def withColumnSpec(self, colName, minValue=None, maxValue=None, step=1, prefix=None,
785+
def withColumnSpec(self, colName, *, minValue=None, maxValue=None, step=1, prefix=None,
786786
random=None, distribution=None,
787787
implicit=False, dataRange=None, omit=False, baseColumn=None, **kwargs):
788788
""" add a column specification for an existing column
@@ -842,7 +842,7 @@ def hasColumnSpec(self, colName):
842842
"""
843843
return colName in self._columnSpecsByName
844844

845-
def withColumn(self, colName, colType=StringType(), minValue=None, maxValue=None, step=1,
845+
def withColumn(self, colName, colType=StringType(), *, minValue=None, maxValue=None, step=1,
846846
dataRange=None, prefix=None, random=None, distribution=None,
847847
baseColumn=None, nullable=True,
848848
omit=False, implicit=False, noWarn=False,
@@ -1058,7 +1058,7 @@ def withStructColumn(self, colName, fields=None, asJson=False, **kwargs):
10581058

10591059
return newDf
10601060

1061-
def _generateColumnDefinition(self, colName, colType=None, baseColumn=None,
1061+
def _generateColumnDefinition(self, colName, colType=None, baseColumn=None, *,
10621062
implicit=False, omit=False, nullable=True, **kwargs):
10631063
""" generate field definition and column spec
10641064
@@ -1591,7 +1591,7 @@ def scriptTable(self, name=None, location=None, tableFormat="delta", asHtml=Fals
15911591

15921592
return results
15931593

1594-
def scriptMerge(self, tgtName=None, srcName=None, updateExpr=None, delExpr=None, joinExpr=None, timeExpr=None,
1594+
def scriptMerge(self, tgtName=None, srcName=None, *, updateExpr=None, delExpr=None, joinExpr=None, timeExpr=None,
15951595
insertExpr=None,
15961596
useExplicitNames=True,
15971597
updateColumns=None, updateColumnExprs=None,

dbldatagen/text_generator_plugins.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ class _FnCallContext:
6969
def __init__(self, txtGen):
7070
self.textGenerator = txtGen
7171

72-
def __init__(self, fn, init=None, initPerBatch=False, name=None, rootProperty=None):
72+
def __init__(self, fn, *, init=None, initPerBatch=False, name=None, rootProperty=None):
7373
super().__init__()
7474
assert fn is not None or callable(fn), "Function must be provided wiith signature fn(context, oldValue)"
7575
assert init is None or callable(init), "Init function must be a callable function or lambda if passed"
@@ -284,7 +284,7 @@ class FakerTextFactory(PyfuncTextFactory):
284284

285285
_defaultFakerTextFactory = None
286286

287-
def __init__(self, locale=None, providers=None, name="FakerText", lib=None,
287+
def __init__(self, *, locale=None, providers=None, name="FakerText", lib=None,
288288
rootClass=None):
289289

290290
super().__init__(name)

dbldatagen/text_generators.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,8 @@ def _prepareTemplateStrings(self, genTemplate, escapeSpecialMeaning=False):
429429

430430
return num_placeholders, retval
431431

432-
def _applyTemplateStringsForTemplate(self, baseValue, genTemplate, placeholders, rnds, escapeSpecialMeaning=False):
432+
def _applyTemplateStringsForTemplate(self, baseValue, genTemplate, placeholders, rnds, *,
433+
escapeSpecialMeaning=False):
433434
""" Vectorized implementation of template driven text substitution
434435
435436
Apply substitutions to placeholders using random numbers

tests/test_basic_test.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
2-
import pytest
32

3+
import pytest
44
from pyspark.sql import functions as F
55
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType
66

@@ -146,7 +146,9 @@ def test_alt_seed_column(self, caplog):
146146
IntegerType(),
147147
{'uniqueValues': 5000, 'random': True})
148148
])
149-
def test_seed_column_nocollision(self, caseName, withIdOutput, idType, additionalOptions, caplog):
149+
def test_seed_column_nocollision(self, caseName, withIdOutput, idType, additionalOptions, caplog): \
150+
# pylint: disable=too-many-positional-arguments
151+
150152
logging.info(f"case: {caseName}")
151153

152154
# caplog fixture captures log content
@@ -189,7 +191,9 @@ def test_seed_column_nocollision(self, caseName, withIdOutput, idType, additiona
189191
("with no Id output _id float", False, FloatType(), "_id"),
190192
("with no Id output _id int", False, IntegerType(), "_id"),
191193
])
192-
def test_seed_column_expected_collision1(self, caseName, withIdOutput, idType, idName, caplog):
194+
def test_seed_column_expected_collision1(self, caseName, withIdOutput, idType, idName, caplog): \
195+
# pylint: disable=too-many-positional-arguments
196+
193197
logging.info(f"case: {caseName}")
194198

195199
# caplog fixture captures log content

tests/test_complex_columns.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def test_unitialized_complex_fields2(self, complexFieldType, expectedType, inval
9191
invalid_data_count = df.where(invalidValueCondition).count()
9292
assert invalid_data_count == 0, "Not expecting invalid values"
9393

94-
@pytest.mark.parametrize("complexFieldType, expectedType, valueInitializer, validValueCondition",
94+
@pytest.mark.parametrize("complexFieldType, expectedType, valueInit, validCond",
9595
[("array<int>", ArrayType(IntegerType()), "array(1,2,3)",
9696
"complex_field[1] = 2"),
9797
("array<array<string>>", ArrayType(ArrayType(StringType())), "array(array('one','two'))",
@@ -111,8 +111,9 @@ def test_unitialized_complex_fields2(self, complexFieldType, expectedType, inval
111111
"complex_field is not Null and complex_field.c = code2"
112112
)
113113
])
114-
def test_initialized_complex_fields(self, complexFieldType, expectedType, valueInitializer, validValueCondition,
115-
setupLogging):
114+
def test_initialized_complex_fields(self, complexFieldType, expectedType, valueInit, validCond, setupLogging): \
115+
# pylint: disable=too-many-positional-arguments
116+
116117
data_rows = 1000
117118
df_spec = (dg.DataGenerator(spark, name="test_data_set1", rows=data_rows,
118119
partitions=spark.sparkContext.defaultParallelism)
@@ -122,7 +123,7 @@ def test_initialized_complex_fields(self, complexFieldType, expectedType, valueI
122123
.withColumn("code3", StringType(), values=['a', 'b', 'c'])
123124
.withColumn("code4", StringType(), values=['a', 'b', 'c'], random=True)
124125
.withColumn("code5", StringType(), values=['a', 'b', 'c'], random=True, weights=[9, 1, 1])
125-
.withColumn("complex_field", complexFieldType, expr=valueInitializer,
126+
.withColumn("complex_field", complexFieldType, expr=valueInit,
126127
baseColumn=['code1', 'code2', 'code3', 'code4', 'code5'])
127128
)
128129

@@ -132,7 +133,7 @@ def test_initialized_complex_fields(self, complexFieldType, expectedType, valueI
132133
complex_type = df.schema["complex_field"].dataType
133134
assert complex_type == expectedType
134135

135-
valid_data_count = df.where(validValueCondition).count()
136+
valid_data_count = df.where(validCond).count()
136137
assert valid_data_count == data_rows, "Not expecting invalid values"
137138

138139
def test_basic_arrays_with_columns(self, setupLogging):

tests/test_constraints.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,9 @@ def test_constraint_filter_expression_cache(self):
109109
("id", "==", 50, 1),
110110
("id", "!=", 50, 98),
111111
])
112-
def test_scalar_relation(self, generationSpec1, column, operation, literalValue, expectedRows):
112+
def test_scalar_relation(self, column, operation, literalValue, expectedRows, generationSpec1): \
113+
# pylint: disable=too-many-positional-arguments
114+
113115
testDataSpec = (generationSpec1
114116
.withConstraints([SqlExpr("id < 100"),
115117
SqlExpr("id > 0")])
@@ -275,7 +277,9 @@ def test_unique_combinations2(self, generationSpec3):
275277
("id", 10, 20, True, 9),
276278
("id", 10, 20, False, 11),
277279
])
278-
def test_literal_range(self, generationSpec2, column, minValue, maxValue, strictFlag, expectedRows):
280+
def test_literal_range(self, column, minValue, maxValue, strictFlag, expectedRows, generationSpec2): \
281+
# pylint: disable=too-many-positional-arguments
282+
279283
testDataSpec = (generationSpec2
280284
.withConstraints([SqlExpr("id < 100"),
281285
SqlExpr("id > 0")])

tests/test_text_generation.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@ def test_text_generator_basics(self):
6969
(r'\n.\n.\n.\n', False, 15, None, True),
7070
(r'\n.\n.\n.\n', False, 15, -1, True),
7171
])
72-
def test_random_number_generator(self, template, escapeSpecial, low, high, useSystemLib):
72+
def test_random_number_generator(self, template, escapeSpecial, low, high, useSystemLib): \
73+
# pylint: disable=too-many-positional-arguments
74+
7375
""" As the test coverage tools dont detect code only used in UDFs,
7476
lets add some explicit tests for the underlying code"""
7577
test_template = TemplateGenerator(template, escapeSpecialChars=escapeSpecial)

0 commit comments

Comments
 (0)