Skip to content

Commit cbc9b9e

Browse files
authored
Merge pull request #374 from MuckRock/json-field-updates
Perform adds with field updates using JSON
2 parents bc9b34b + 1806121 commit cbc9b9e

File tree

2 files changed

+84
-36
lines changed

2 files changed

+84
-36
lines changed

pysolr.py

Lines changed: 54 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -919,8 +919,56 @@ def suggest_terms(self, fields, prefix, handler="terms", **kwargs):
919919
)
920920
return res
921921

922-
def _build_json_doc(self, doc):
923-
cleaned_doc = {k: v for k, v in doc.items() if not self._is_null_value(v)}
922+
def _build_docs(self, docs, boost=None, fieldUpdates=None, commitWithin=None):
923+
# if no boost needed use json multidocument api
924+
# The JSON API skips the XML conversion and speedup load from 15 to 20 times.
925+
# CPU Usage is drastically lower.
926+
if boost is None:
927+
solrapi = "JSON"
928+
message = docs
929+
# single doc convert to array of docs
930+
if isinstance(message, dict):
931+
# convert dict to list
932+
message = [message]
933+
# json array of docs
934+
if isinstance(message, list):
935+
# convert to string
936+
cleaned_message = [
937+
self._build_json_doc(doc, fieldUpdates=fieldUpdates)
938+
for doc in message
939+
]
940+
m = self.encoder.encode(cleaned_message).encode("utf-8")
941+
else:
942+
raise ValueError("wrong message type")
943+
else:
944+
solrapi = "XML"
945+
message = ElementTree.Element("add")
946+
947+
if commitWithin:
948+
message.set("commitWithin", commitWithin)
949+
950+
for doc in docs:
951+
el = self._build_xml_doc(doc, boost=boost, fieldUpdates=fieldUpdates)
952+
message.append(el)
953+
954+
# This returns a bytestring. Ugh.
955+
m = ElementTree.tostring(message, encoding="utf-8")
956+
# Convert back to Unicode please.
957+
m = force_unicode(m)
958+
959+
return (solrapi, m, len(message))
960+
961+
def _build_json_doc(self, doc, fieldUpdates=None):
962+
if fieldUpdates is None:
963+
cleaned_doc = {k: v for k, v in doc.items() if not self._is_null_value(v)}
964+
else:
965+
# id must be added without a modifier
966+
# if using field updates, all other fields should have a modifier
967+
cleaned_doc = {
968+
k: {fieldUpdates[k]: v} if k in fieldUpdates else v
969+
for k, v in doc.items()
970+
}
971+
924972
return cleaned_doc
925973

926974
def _build_xml_doc(self, doc, boost=None, fieldUpdates=None):
@@ -1028,43 +1076,13 @@ def add(
10281076
"""
10291077
start_time = time.time()
10301078
self.log.debug("Starting to build add request...")
1031-
solrapi = "XML"
1032-
# if no commands (no boost, no atomic updates) needed use json multidocument api
1033-
# The JSON API skips the XML conversion and speedup load from 15 to 20 times.
1034-
# CPU Usage is drastically lower.
1035-
if boost is None and fieldUpdates is None:
1036-
solrapi = "JSON"
1037-
message = docs
1038-
# single doc convert to array of docs
1039-
if isinstance(message, dict):
1040-
# convert dict to list
1041-
message = [message]
1042-
# json array of docs
1043-
if isinstance(message, list):
1044-
# convert to string
1045-
cleaned_message = [self._build_json_doc(doc) for doc in message]
1046-
m = self.encoder.encode(cleaned_message).encode("utf-8")
1047-
else:
1048-
raise ValueError("wrong message type")
1049-
else:
1050-
message = ElementTree.Element("add")
1051-
1052-
if commitWithin:
1053-
message.set("commitWithin", commitWithin)
1054-
1055-
for doc in docs:
1056-
el = self._build_xml_doc(doc, boost=boost, fieldUpdates=fieldUpdates)
1057-
message.append(el)
1058-
1059-
# This returns a bytestring. Ugh.
1060-
m = ElementTree.tostring(message, encoding="utf-8")
1061-
# Convert back to Unicode please.
1062-
m = force_unicode(m)
1063-
1079+
solrapi, m, len_message = self._build_docs(
1080+
docs, boost, fieldUpdates, commitWithin
1081+
)
10641082
end_time = time.time()
10651083
self.log.debug(
10661084
"Built add request of %s docs in %0.2f seconds.",
1067-
len(message),
1085+
len_message,
10681086
end_time - start_time,
10691087
)
10701088
return self._update(

tests/test_client.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,36 @@ def test_build_json_doc_matches_xml(self):
767767
self.assertNotIn("title", doc_json)
768768
self.assertIsNone(doc_xml.find("*[name='title']"))
769769

770+
def test__build_docs_plain(self):
771+
docs = [{
772+
"id": "doc_1",
773+
"title": "",
774+
"price": 12.59,
775+
"popularity": 10
776+
}]
777+
solrapi, m, len_message = self.solr._build_docs(docs)
778+
self.assertEqual(solrapi, "JSON")
779+
780+
def test__build_docs_boost(self):
781+
docs = [{
782+
"id": "doc_1",
783+
"title": "",
784+
"price": 12.59,
785+
"popularity": 10
786+
}]
787+
solrapi, m, len_message = self.solr._build_docs(docs, boost={"title": 10.0})
788+
self.assertEqual(solrapi, "XML")
789+
790+
def test__build_docs_field_updates(self):
791+
docs = [{
792+
"id": "doc_1",
793+
"popularity": 10
794+
}]
795+
solrapi, m, len_message = self.solr._build_docs(
796+
docs, fieldUpdates={"popularity": "inc"}
797+
)
798+
self.assertEqual(solrapi, "JSON")
799+
770800
def test_add(self):
771801
self.assertEqual(len(self.solr.search("doc")), 3)
772802
self.assertEqual(len(self.solr.search("example")), 2)

0 commit comments

Comments
 (0)