GoogleCloudPlatform · andrewsg · Jun 15, 2018 · Jun 13, 2018 · Jun 15, 2018 · andrewsg
diff --git a/vision/cloud-client/detect/README.rst b/vision/cloud-client/detect/README.rst
@@ -81,7 +81,7 @@ To run this sample:
     $ python detect.py
 
     usage: detect.py [-h]
-                     {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri}
+                     {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri}
                      ...
 
     This application demonstrates how to perform basic operations with the
@@ -94,12 +94,13 @@ To run this sample:
     python detect.py web-uri http://wheresgus.com/dog.JPG
     python detect.py web-geo ./resources/city.jpg
     python detect.py faces-uri gs://your-bucket/file.jpg
+    python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf     gs://BUCKET_NAME/PREFIX/
 
     For more information, the documentation at
     https://cloud.google.com/vision/docs.
 
     positional arguments:
-      {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri}
+      {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri}
         faces               Detects faces in an image.
         faces-uri           Detects faces in the file located in Google Cloud
                             Storage or the web.
@@ -135,6 +136,7 @@ To run this sample:
         document            Detects document features in an image.
         document-uri        Detects document features in the file located in
                             Google Cloud Storage.
+        ocr-uri             OCR with PDF/TIFF as source files on GCS
 
     optional arguments:
       -h, --help            show this help message and exit

diff --git a/vision/cloud-client/detect/detect.py b/vision/cloud-client/detect/detect.py
@@ -24,15 +24,20 @@
 python detect.py web-uri http://wheresgus.com/dog.JPG
 python detect.py web-geo ./resources/city.jpg
 python detect.py faces-uri gs://your-bucket/file.jpg
+python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf \
+    gs://BUCKET_NAME/PREFIX/
 
 For more information, the documentation at
 https://cloud.google.com/vision/docs.
 """
 
 import argparse
 import io
+import re
 
+from google.cloud import storage
 from google.cloud import vision
+from google.protobuf import json_format
 
 
 # [START def_detect_faces]
@@ -636,6 +641,77 @@ def detect_document_uri(uri):
 # [END def_detect_document_uri]
 
 
+# [START vision_async_detect_document_ocr]
+def async_detect_document(gcs_source_uri, gcs_destination_uri):
+    """OCR with PDF/TIFF as source files on GCS"""
+    # Supported mime_types are: 'application/pdf' and 'image/tiff'
+    mime_type = 'application/pdf'
+
+    # How many pages should be grouped into each json output file.
+    # With a file of 5 pages
+    batch_size = 2
+
+    client = vision.ImageAnnotatorClient()
+
+    feature = vision.types.Feature(
+        type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)
+
+    gcs_source = vision.types.GcsSource(uri=gcs_source_uri)
+    input_config = vision.types.InputConfig(
+        gcs_source=gcs_source, mime_type=mime_type)
+
+    gcs_destination = vision.types.GcsDestination(uri=gcs_destination_uri)
+    output_config = vision.types.OutputConfig(
+        gcs_destination=gcs_destination, batch_size=batch_size)
+
+    async_request = vision.types.AsyncAnnotateFileRequest(
+        features=[feature], input_config=input_config,
+        output_config=output_config)
+
+    operation = client.async_batch_annotate_files(
+        requests=[async_request])
+
+    print('Waiting for the operation to finish.')
+    operation.result(timeout=180)
+
+    # Once the request has completed and the output has been
+    # written to GCS, we can list all the output files.
+    storage_client = storage.Client()
+
+    match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri)
+    bucket_name = match.group(1)
+    prefix = match.group(2)
+
+    bucket = storage_client.get_bucket(bucket_name=bucket_name)
+
+    # List objects with the given prefix.
+    blob_list = list(bucket.list_blobs(prefix=prefix))
+    print('Output files:')
+    for blob in blob_list:
+        print(blob.name)
+
+    # Process the first output file from GCS.
+    # Since we specified batch_size=2, the first response contains
+    # the first two pages of the input file.
+    output = blob_list[0]
+
+    json_string = output.download_as_string()
+    response = json_format.Parse(
+        json_string, vision.types.AnnotateFileResponse())
+
+    # The actual response for the first page of the input file.
+    first_page_response = response.responses[0]
+    annotation = first_page_response.full_text_annotation
+
+    # Here we print the full text from the first page.
+    # The response contains more information:
+    # annotation/pages/blocks/paragraphs/words/symbols
+    # including confidence scores and bounding boxes
+    print(u'Full text:\n{}'.format(
+        annotation.text))
+# [END vision_async_detect_document_ocr]
+
+
 def run_local(args):
     if args.command == 'faces':
         detect_faces(args.path)
@@ -684,6 +760,8 @@ def run_uri(args):
         detect_document_uri(args.uri)
     elif args.command == 'web-geo-uri':
         web_entities_include_geo_results_uri(args.uri)
+    elif args.command == 'ocr-uri':
+        async_detect_document(args.uri, args.destination_uri)
 
 
 if __name__ == '__main__':
@@ -785,9 +863,14 @@ def run_uri(args):
         'document-uri', help=detect_document_uri.__doc__)
     document_uri_parser.add_argument('uri')
 
+    ocr_uri_parser = subparsers.add_parser(
+        'ocr-uri', help=async_detect_document.__doc__)
+    ocr_uri_parser.add_argument('uri')
+    ocr_uri_parser.add_argument('destination_uri')
+
     args = parser.parse_args()
 
-    if ('uri' in args.command):
+    if 'uri' in args.command:
         run_uri(args)
     else:
         run_local(args)
diff --git a/vision/cloud-client/detect/detect_test.py b/vision/cloud-client/detect/detect_test.py
@@ -14,9 +14,14 @@
 
 import os
 
+from google.cloud import storage
+
 import detect
 
 BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
+OUTPUT_PREFIX = 'OCR_PDF_TEST_OUTPUT'
+GCS_SOURCE_URI = 'gs://{}/HodgeConj.pdf'.format(BUCKET)
+GCS_DESTINATION_URI = 'gs://{}/{}/'.format(BUCKET, OUTPUT_PREFIX)
 
 
 def test_labels(capsys):
@@ -271,3 +276,20 @@ def test_detect_crop_hints_http(capsys):
     detect.detect_crop_hints_uri(uri.format(BUCKET))
     out, _ = capsys.readouterr()
     assert 'bounds: (0,0)' in out
+
+
+def test_async_detect_document(capsys):
+    storage_client = storage.Client()
+    bucket = storage_client.get_bucket(BUCKET)
+    assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0
+
+    detect.async_detect_document(
+        gcs_source_uri=GCS_SOURCE_URI,
+        gcs_destination_uri=GCS_DESTINATION_URI)
+    out, _ = capsys.readouterr()
+
+    assert 'Hodge conjecture' in out
+    assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 3
+
+    for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
+        blob.delete()
diff --git a/vision/cloud-client/detect/requirements.txt b/vision/cloud-client/detect/requirements.txt
@@ -1,2 +1,2 @@
-google-cloud-vision==0.31.0
+google-cloud-vision==0.32.0
 google-cloud-storage==1.6.0