Skip to content

Commit 040fe23

Browse files
authored
Merge pull request #303 from mpsonntag/idcheck
Add validation for unique IDs within a document
2 parents 7ee5066 + 4674ecf commit 040fe23

File tree

4 files changed

+137
-37
lines changed

4 files changed

+137
-37
lines changed

docs/data_model.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ interoperability, and being customizable. The model defines four
1313
entities (Property, Section, Value, RootSection) whose relations and
1414
elements are shown in the figure below.
1515

16-
![odml_logo](./images/erModel.png "odml data model")
16+
![odml_logo](images/erModel.png "odml data model")
1717

1818
Property and Section are the core entities. A Section contains
1919
Properties and can further have subsection thus building a tree-like

docs/images/erModel.png

56.2 KB
Loading

docs/index.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@ pip install odml
5050
- We have assembled a set of
5151
[tutorials](http://github.com/G-Node/python-odml/blob/master/doc/tutorial.rst "Python Tutorial").
5252

53+
### Python convenience scripts
54+
55+
The Python installation features two convenience commandline scripts.
56+
57+
- `odmlconversion`: Converts odML files of previous file versions into the current one.
58+
- `odmltordf`: Converts odML files to the supported RDF version of odML.
59+
60+
Both scripts provide detailed usage descriptions by adding the help flag to the command.
61+
62+
odmlconversion -h
63+
odmltordf -h
64+
5365
## Support
5466

5567
If you experience problems using *odml* feel free to join our IRC channel

odml/validation.py

Lines changed: 124 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,40 @@
33
Generic odML validation framework
44
"""
55

6+
LABEL_ERROR = 'error'
7+
LABEL_WARNING = 'warning'
8+
69

710
class ValidationError(object):
811
"""
912
Represents an error found in the validation process
1013
11-
The error is bound to an odML-object (*obj*) or a list of those
12-
and contains a message and a type which may be one of:
13-
'error', 'warning', 'info'
14+
The error is bound to an odML-object (*obj*) or a list of
15+
those and contains a message and a rank which may be one of:
16+
'error', 'warning'.
1417
"""
1518

16-
def __init__(self, obj, msg, type='error'):
19+
def __init__(self, obj, msg, rank=LABEL_ERROR):
1720
self.obj = obj
1821
self.msg = msg
19-
self.type = type
22+
self.rank = rank
2023

2124
@property
2225
def is_warning(self):
23-
return self.type == 'warning'
26+
return self.rank == LABEL_WARNING
2427

2528
@property
2629
def is_error(self):
27-
return self.type == 'error'
30+
return self.rank == LABEL_ERROR
2831

2932
@property
3033
def path(self):
3134
return self.obj.get_path()
3235

3336
def __repr__(self):
34-
return "<ValidationError(%s):%s \"%s\">" % (self.type,
35-
self.obj,
36-
self.msg)
37+
return "<ValidationError(%s):%s '%s'>" % (self.rank,
38+
self.obj,
39+
self.msg)
3740

3841

3942
class Validation(object):
@@ -65,7 +68,7 @@ def __init__(self, doc):
6568
self.doc = doc # may also be a section
6669
self.errors = []
6770
self.validate(doc)
68-
# TODO isn't there a 'walk' method for these things?
71+
6972
for sec in doc.itersections(recursive=True):
7073
self.validate(sec)
7174
for prop in sec.properties:
@@ -98,34 +101,115 @@ def __getitem__(self, obj):
98101
def section_type_must_be_defined(sec):
99102
"""test that no section has an undefined type"""
100103
if sec.type is None or sec.type == '' or sec.type == 'undefined':
101-
yield ValidationError(sec, 'Section type undefined', 'warning')
104+
yield ValidationError(sec, 'Section type undefined', LABEL_WARNING)
105+
102106

103107
Validation.register_handler('section', section_type_must_be_defined)
104108

105109

106-
def section_repository_should_be_present(sec):
110+
def section_repository_present(sec):
107111
"""
108112
1. warn, if a section has no repository or
109113
2. the section type is not present in the repository
110114
"""
111115
repo = sec.get_repository()
112116
if repo is None:
113-
yield ValidationError(sec, 'A section should have an associated '
114-
'repository', 'warning')
117+
yield ValidationError(sec,
118+
'A section should have an associated repository',
119+
LABEL_WARNING)
115120
return
116121

117122
try:
118123
tsec = sec.get_terminology_equivalent()
119-
except Exception as e:
120-
yield ValidationError(sec, 'Could not load terminology: %s' % e,
121-
'warning')
124+
except Exception as exc:
125+
yield ValidationError(sec,
126+
'Could not load terminology: %s' % exc,
127+
LABEL_WARNING)
122128
return
123129

124130
if tsec is None:
125-
yield ValidationError(sec, "Section type '%s' not found in terminology" % sec.type,
126-
'warning')
131+
yield ValidationError(sec,
132+
"Section type '%s' not found in terminology" % sec.type,
133+
LABEL_WARNING)
134+
135+
136+
Validation.register_handler('section', section_repository_present)
137+
138+
139+
def document_unique_ids(doc):
140+
"""
141+
Traverse an odML Document and check whether all
142+
assigned ids are unique within the document.
143+
144+
Yields all duplicate odML object id entries
145+
that are encountered.
146+
147+
:param doc: odML document
148+
"""
149+
id_map = {doc.id: "Document '%s'" % doc.get_path()}
150+
for i in section_unique_ids(doc, id_map):
151+
yield i
152+
153+
154+
def section_unique_ids(parent, id_map=None):
155+
"""
156+
Traverse a parent (odML Document or Section)
157+
and check whether all assigned ids are unique.
158+
159+
A "id":"odML object / path" dictionary of additional
160+
'to-be-excluded' ids may be handed in via the
161+
*id_map* attribute.
162+
163+
Yields all duplicate odML object id entries
164+
that are encountered.
127165
128-
Validation.register_handler('section', section_repository_should_be_present)
166+
:param parent: odML Document or Section
167+
:param id_map: "id":"odML object / path" dictionary
168+
"""
169+
if not id_map:
170+
id_map = {}
171+
172+
for sec in parent.sections:
173+
for i in property_unique_ids(sec, id_map):
174+
yield i
175+
176+
if sec.id in id_map:
177+
yield ValidationError(sec, "Duplicate id in Section '%s' and '%s'" %
178+
(sec.get_path(), id_map[sec.id]))
179+
else:
180+
id_map[sec.id] = "Section '%s'" % sec.get_path()
181+
182+
for i in section_unique_ids(sec, id_map):
183+
yield i
184+
185+
186+
def property_unique_ids(section, id_map=None):
187+
"""
188+
Check whether all ids assigned to the odML
189+
Properties of an odML Section are unique.
190+
191+
A "id":"odML object / path" dictionary of additional
192+
'to-be-excluded' ids may be handed in via the
193+
*id_map* attribute.
194+
195+
Yields all duplicate odML object id entries
196+
that are encountered.
197+
198+
:param section: odML Section
199+
:param id_map: "id":"odML object / path" dictionary
200+
"""
201+
if not id_map:
202+
id_map = {}
203+
204+
for prop in section.properties:
205+
if prop.id in id_map:
206+
yield ValidationError(prop, "Duplicate id in Property '%s' and '%s'" %
207+
(prop.get_path(), id_map[prop.id]))
208+
else:
209+
id_map[prop.id] = "Property '%s'" % prop.get_path()
210+
211+
212+
Validation.register_handler('odML', document_unique_ids)
129213

130214

131215
def object_unique_names(obj, children, attr=lambda x: x.name,
@@ -143,13 +227,13 @@ def object_unique_names(obj, children, attr=lambda x: x.name,
143227
if len(names) == len(children(obj)):
144228
return # quick exit
145229
names = set()
146-
for s in children(obj):
147-
if attr(s) in names:
148-
yield ValidationError(s, msg, 'error')
149-
names.add(attr(s))
230+
for i in children(obj):
231+
if attr(i) in names:
232+
yield ValidationError(i, msg, LABEL_ERROR)
233+
names.add(attr(i))
150234

151235

152-
def section_unique_name_type_combination(obj):
236+
def section_unique_name_type(obj):
153237
for i in object_unique_names(
154238
obj,
155239
attr=lambda x: (x.name, x.type),
@@ -162,8 +246,9 @@ def property_unique_names(obj):
162246
for i in object_unique_names(obj, lambda x: x.properties):
163247
yield i
164248

165-
Validation.register_handler('odML', section_unique_name_type_combination)
166-
Validation.register_handler('section', section_unique_name_type_combination)
249+
250+
Validation.register_handler('odML', section_unique_name_type)
251+
Validation.register_handler('section', section_unique_name_type)
167252
Validation.register_handler('section', property_unique_names)
168253

169254

@@ -179,11 +264,12 @@ def property_terminology_check(prop):
179264
if tsec is None:
180265
return
181266
try:
182-
tprop = tsec.properties[prop.name]
267+
tsec.properties[prop.name]
183268
except KeyError:
184-
tprop = None
185-
yield ValidationError(prop, "Property '%s' not found in terminology" %
186-
prop.name, 'warning')
269+
yield ValidationError(prop,
270+
"Property '%s' not found in terminology" % prop.name,
271+
LABEL_WARNING)
272+
187273

188274
Validation.register_handler('property', property_terminology_check)
189275

@@ -200,12 +286,14 @@ def property_dependency_check(prop):
200286
try:
201287
dep_obj = prop.parent[dep]
202288
except KeyError:
203-
yield ValidationError(prop, "Property refers to a non-existent "
204-
"dependency object", 'warning')
289+
yield ValidationError(prop,
290+
"Property refers to a non-existent dependency object",
291+
LABEL_WARNING)
205292
return
206293

207-
if prop.dependency_value not in dep_obj.value[0]: # FIXME
294+
if prop.dependency_value not in dep_obj.value[0]:
208295
yield ValidationError(prop, "Dependency-value is not equal to value of"
209-
" the property's dependency", 'warning')
296+
" the property's dependency", LABEL_WARNING)
297+
210298

211299
Validation.register_handler('property', property_dependency_check)

0 commit comments

Comments
 (0)