diff --git a/pysolr.py b/pysolr.py index 6b612791..39cfb780 100644 --- a/pysolr.py +++ b/pysolr.py @@ -916,8 +916,56 @@ def suggest_terms(self, fields, prefix, handler="terms", **kwargs): ) return res - def _build_json_doc(self, doc): - cleaned_doc = {k: v for k, v in doc.items() if not self._is_null_value(v)} + def _build_docs(self, docs, boost=None, fieldUpdates=None, commitWithin=None): + # if no boost needed use json multidocument api + # The JSON API skips the XML conversion and speedup load from 15 to 20 times. + # CPU Usage is drastically lower. + if boost is None: + solrapi = "JSON" + message = docs + # single doc convert to array of docs + if isinstance(message, dict): + # convert dict to list + message = [message] + # json array of docs + if isinstance(message, list): + # convert to string + cleaned_message = [ + self._build_json_doc(doc, fieldUpdates=fieldUpdates) + for doc in message + ] + m = self.encoder.encode(cleaned_message).encode("utf-8") + else: + raise ValueError("wrong message type") + else: + solrapi = "XML" + message = ElementTree.Element("add") + + if commitWithin: + message.set("commitWithin", commitWithin) + + for doc in docs: + el = self._build_xml_doc(doc, boost=boost, fieldUpdates=fieldUpdates) + message.append(el) + + # This returns a bytestring. Ugh. + m = ElementTree.tostring(message, encoding="utf-8") + # Convert back to Unicode please. + m = force_unicode(m) + + return (solrapi, m, len(message)) + + def _build_json_doc(self, doc, fieldUpdates=None): + if fieldUpdates is None: + cleaned_doc = {k: v for k, v in doc.items() if not self._is_null_value(v)} + else: + # id must be added without a modifier + # if using field updates, all other fields should have a modifier + cleaned_doc = { + k: {fieldUpdates[k]: v} if k in fieldUpdates else v + for k, v in doc.items() + } + return cleaned_doc def _build_xml_doc(self, doc, boost=None, fieldUpdates=None): @@ -1025,43 +1073,13 @@ def add( """ start_time = time.time() self.log.debug("Starting to build add request...") - solrapi = "XML" - # if no commands (no boost, no atomic updates) needed use json multidocument api - # The JSON API skips the XML conversion and speedup load from 15 to 20 times. - # CPU Usage is drastically lower. - if boost is None and fieldUpdates is None: - solrapi = "JSON" - message = docs - # single doc convert to array of docs - if isinstance(message, dict): - # convert dict to list - message = [message] - # json array of docs - if isinstance(message, list): - # convert to string - cleaned_message = [self._build_json_doc(doc) for doc in message] - m = self.encoder.encode(cleaned_message).encode("utf-8") - else: - raise ValueError("wrong message type") - else: - message = ElementTree.Element("add") - - if commitWithin: - message.set("commitWithin", commitWithin) - - for doc in docs: - el = self._build_xml_doc(doc, boost=boost, fieldUpdates=fieldUpdates) - message.append(el) - - # This returns a bytestring. Ugh. - m = ElementTree.tostring(message, encoding="utf-8") - # Convert back to Unicode please. - m = force_unicode(m) - + solrapi, m, len_message = self._build_docs( + docs, boost, fieldUpdates, commitWithin + ) end_time = time.time() self.log.debug( "Built add request of %s docs in %0.2f seconds.", - len(message), + len_message, end_time - start_time, ) return self._update( diff --git a/tests/test_client.py b/tests/test_client.py index b0ef2028..db59d265 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -772,6 +772,36 @@ def test_build_json_doc_matches_xml(self): self.assertNotIn("title", doc_json) self.assertIsNone(doc_xml.find("*[name='title']")) + def test__build_docs_plain(self): + docs = [{ + "id": "doc_1", + "title": "", + "price": 12.59, + "popularity": 10 + }] + solrapi, m, len_message = self.solr._build_docs(docs) + self.assertEqual(solrapi, "JSON") + + def test__build_docs_boost(self): + docs = [{ + "id": "doc_1", + "title": "", + "price": 12.59, + "popularity": 10 + }] + solrapi, m, len_message = self.solr._build_docs(docs, boost={"title": 10.0}) + self.assertEqual(solrapi, "XML") + + def test__build_docs_field_updates(self): + docs = [{ + "id": "doc_1", + "popularity": 10 + }] + solrapi, m, len_message = self.solr._build_docs( + docs, fieldUpdates={"popularity": "inc"} + ) + self.assertEqual(solrapi, "JSON") + def test_add(self): self.assertEqual(len(self.solr.search("doc")), 3) self.assertEqual(len(self.solr.search("example")), 2)