diff --git a/.cursorignore b/.cursorignore
new file mode 100644
index 00000000..7664704b
--- /dev/null
+++ b/.cursorignore
@@ -0,0 +1 @@
+*.bak
\ No newline at end of file
diff --git a/.data/divvy.jsonl.gz b/.data/divvy.jsonl.gz
new file mode 100644
index 00000000..ba93c40f
Binary files /dev/null and b/.data/divvy.jsonl.gz differ
diff --git a/.data/wikipedia.jsonl.gz b/.data/wikipedia.jsonl.gz
new file mode 100644
index 00000000..3c2c9f43
Binary files /dev/null and b/.data/wikipedia.jsonl.gz differ
diff --git a/.gitignore b/.gitignore
index 1e099b85..a2fe8681 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,4 @@ www/transform/.gradle
 logstash/.gradle
 logstash/build
 theme.css
+*.jsonl
\ No newline at end of file
diff --git a/data-extraction-report.txt b/data-extraction-report.txt
new file mode 100644
index 00000000..ad1dd9d5
--- /dev/null
+++ b/data-extraction-report.txt
@@ -0,0 +1,12 @@
+Data Extraction Report
+====================
+
+Successfully extracted:
+total 792864
+-rw-r--r--@ 1 rmdemp  staff   349M May 14 15:28 divvy.jsonl
+-rw-r--r--@ 1 rmdemp  staff    34M May 14 15:28 divvy.jsonl.gz
+-rw-r--r--@ 1 rmdemp  staff   145K May 14 15:29 wikipedia.jsonl
+-rw-r--r--@ 1 rmdemp  staff    38K May 14 15:29 wikipedia.jsonl.gz
+
+Freebase index status:
+The freebase index could not be loaded because it requires a synonyms file at /usr/share/elasticsearch/config/analysis/first_name.synonyms.txt which is missing in our container setup.
diff --git a/docker-compose-elasticsearch.yml b/docker-compose-elasticsearch.yml.bak
similarity index 100%
rename from docker-compose-elasticsearch.yml
rename to docker-compose-elasticsearch.yml.bak
diff --git a/docker-compose.yml b/docker-compose.yml
index 942a48b5..47498b46 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,84 +1,61 @@
-version: '2'
+version: '3'
 services:
-  elastic1:
-    extends:
-      file: docker-compose-elasticsearch.yml
-      service: elasticsearch
+  elasticsearch:
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.18.0
+    container_name: elasticsearch
     environment:
-      ES_NODENAME: elastic1
-    ports:
-      - "9200:9200"
-      - "9300:9300"
-  elastic2:
-    extends:
-      file: docker-compose-elasticsearch.yml
-      service: elasticsearch
-    environment:
-      ES_NODENAME: elastic2
-    ports:
-      - "9201:9200"
-      - "9301:9300"
-    depends_on:
-      - elastic1
-  elastic3:
-    extends:
-      file: docker-compose-elasticsearch.yml
-      service: elasticsearch
-    environment:
-      ES_NODENAME: elastic3
-    ports:
-      - "9202:9200"
-      - "9302:9300"
-    depends_on:
-      - elastic1
-  inquisitor:
-    image: spantree/elasticsearch-inquisitor
-    environment:
-      ELASTICSEARCH_URL: "http://elastic1:9200"
-    ports:
-      - "9400:80"
-  exercises:
-    image: nginx:1.11-alpine
+      - discovery.type=single-node
+      - xpack.security.enabled=false
+      - "ES_JAVA_OPTS=-Xms4g -Xmx4g"
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
     volumes:
-      - "./exercises:/usr/share/nginx/html"
-      - "./exercises/nginx:/etc/nginx"
-    ports:
-      - "9500:80"
-  logstash:
-    image: logstash:2.4
+      - elasticsearch-data:/usr/share/elasticsearch/data
+    ports:
+      - 9200:9200
+      - 9300:9300
+    networks:
+      - elastic
+
+  elasticsearch-2:
+    image: elasticsearch:2.4.5
+    platform: linux/amd64
+    container_name: elasticsearch-2
+    environment:
+      - bootstrap.memory_lock=true
+      - "ES_JAVA_OPTS=-Xms1g -Xmx1g"
+      - ES_NODENAME=elasticsearch-2
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
     volumes:
-      - "./logstash:/config-dir"
-      - "./data:/usr/local/share/data"
-    command: ["logstash", "-f", "/config-dir/logstash.conf"]
-    ports:
-      - "3333:3333"
-      - "5044:5044"
-      - "9600:9600"
-    depends_on:
-      - elastic1
-      - elastic2
-      - elastic3
+      - elasticsearch-2-data:/usr/share/elasticsearch/data
+      - ./es2-config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
+    ports:
+      - 9201:9200
+      - 9301:9300
+    networks:
+      - elastic
+
   kibana:
-    build: kibana
+    image: docker.elastic.co/kibana/kibana:8.18.0
+    container_name: kibana
     ports:
-      - "5601:5601"
+      - 5601:5601
     environment:
-      ELASTICSEARCH_URL: "http://elastic1:9200"
-    volumes:
-      - "./kibana/config:/opt/kibana/config"
+      - ELASTICSEARCH_HOSTS=http://elasticsearch:9200
     depends_on:
-      - elastic1
-      - elastic2
-      - elastic3
-  slides:
-    build: slides
-    ports:
-      - "9000:9000"
-      - "35729:35729" # to enable live reloading
-    environment:
-      ELASTICSEARCH_URL: http://localhost:9200
-      KIBANA_URL: http://localhost:5601
-      INQUISITOR_URL: http://localhost:9400
-      EXERCISES_URL: http://localhost:9500
-    volumes:
-      - ./slides/slides:/usr/src/slides/slides
+      - elasticsearch
+    networks:
+      - elastic
+
+volumes:
+  elasticsearch-data:
+  elasticsearch-2-data:
+
+networks:
+  elastic:
+    driver: bridge 
\ No newline at end of file
diff --git a/docker-compose.yml.bak b/docker-compose.yml.bak
new file mode 100644
index 00000000..3fdd96b7
--- /dev/null
+++ b/docker-compose.yml.bak
@@ -0,0 +1,92 @@
+version: '2'
+services:
+  elastic1:
+    platform: linux/amd64
+    extends:
+      file: docker-compose-elasticsearch.yml
+      service: elasticsearch
+    environment:
+      ES_NODENAME: elastic1
+    ports:
+      - "9200:9200"
+      - "9300:9300"
+  elastic2:
+    platform: linux/amd64
+    extends:
+      file: docker-compose-elasticsearch.yml
+      service: elasticsearch
+    environment:
+      ES_NODENAME: elastic2
+    ports:
+      - "9201:9200"
+      - "9301:9300"
+    depends_on:
+      - elastic1
+  elastic3:
+    platform: linux/amd64
+    extends:
+      file: docker-compose-elasticsearch.yml
+      service: elasticsearch
+    environment:
+      ES_NODENAME: elastic3
+    ports:
+      - "9202:9200"
+      - "9302:9300"
+    depends_on:
+      - elastic1
+  inquisitor:
+    platform: linux/amd64
+    image: spantree/elasticsearch-inquisitor
+    environment:
+      ELASTICSEARCH_URL: "http://elastic1:9200"
+    ports:
+      - "9400:80"
+  exercises:
+    platform: linux/amd64
+    image: nginx:1.11-alpine
+    volumes:
+      - "./exercises:/usr/share/nginx/html"
+      - "./exercises/nginx:/etc/nginx"
+    ports:
+      - "9500:80"
+  logstash:
+    platform: linux/amd64
+    image: logstash:2.4
+    volumes:
+      - "./logstash:/config-dir"
+      - "./data:/usr/local/share/data"
+    command: [ "logstash", "-f", "/config-dir/logstash.conf" ]
+    ports:
+      - "3333:3333"
+      - "5044:5044"
+      - "9600:9600"
+    depends_on:
+      - elastic1
+      - elastic2
+      - elastic3
+  kibana:
+    platform: linux/amd64
+    build: kibana
+    ports:
+      - "5601:5601"
+    environment:
+      ELASTICSEARCH_URL: "http://elastic1:9200"
+    volumes:
+      - "./kibana/config:/opt/kibana/config"
+    depends_on:
+      - elastic1
+      - elastic2
+      - elastic3
+  slides:
+    platform: linux/amd64
+    build: slides
+    ports:
+      - "9000:9000"
+      - "35729:35729" # to enable live reloading
+    environment:
+      ELASTICSEARCH_URL: http://localhost:9200
+      KIBANA_URL: http://localhost:5601
+      INQUISITOR_URL: http://localhost:9400
+      EXERCISES_URL: http://localhost:9500
+    volumes:
+      - ./slides/slides:/usr/src/slides/slides
diff --git a/es2-config/elasticsearch.yml b/es2-config/elasticsearch.yml
new file mode 100644
index 00000000..0836cb25
--- /dev/null
+++ b/es2-config/elasticsearch.yml
@@ -0,0 +1,40 @@
+network:
+  host: 0.0.0.0
+node:
+  name: ${ES_NODENAME}
+index:
+  number_of_replicas: 0
+http:
+  max_content_length: 500mb
+bootstrap:
+  mlockall: true
+indices:
+  fielddata:
+    cache:
+      size: 25%
+discovery:
+  zen:
+    ping:
+      unicast:
+        hosts: ["localhost"]
+      multicast:
+        enabled: false
+    minimum_master_nodes: 1
+repositories:
+  url:
+    allowed_urls: ["https://elasticsearch-sample-data.s3.amazonaws.com/*"]
+script:
+  engine:
+    groovy:
+      inline:
+        aggs: true
+        mapping: true
+        search: true
+        update: true
+        plugin: true
+      indexed:
+        aggs: true
+        mapping: true
+        search: true
+        update: true
+        plugin: true 
\ No newline at end of file
diff --git a/exercises/aggregations.sense b/exercises/aggregations.sense
index 0d9cc703..a1726907 100644
--- a/exercises/aggregations.sense
+++ b/exercises/aggregations.sense
@@ -3,16 +3,35 @@
 
 # Query Divvy trips.
 #
-GET /divvy/trip/_search
+GET /divvy/_search
+{
+  "query": {
+    "term": {
+      "_index": "divvy"
+    }
+  }
+}
 
 # Query Divvy stations
 #
-GET /divvy/station/_search
+GET /divvy/_search
+{
+  "query": {
+    "term": {
+      "_index": "divvy"
+    }
+  }
+}
 
 # Get terms aggregation for gender of rider.
 #
-GET /divvy/trip/_search
+GET /divvy/_search
 {
+  "query": {
+    "term": {
+      "_index": "divvy"
+    }
+  },
   "aggs": {
     "genders": {
       "terms": {
@@ -25,8 +44,13 @@ GET /divvy/trip/_search
 
 # Get statistics for trip duration.
 #
-GET /divvy/trip/_search
+GET /divvy/_search
 {
+  "query": {
+    "term": {
+      "_index": "divvy"
+    }
+  },
   "aggs": {
     "trip_duration_stats": {
       "stats": {
@@ -39,8 +63,13 @@ GET /divvy/trip/_search
 
 # Get extended statistics for trip duration.
 #
-GET /divvy/trip/_search
+GET /divvy/_search
 {
+  "query": {
+    "term": {
+      "_index": "divvy"
+    }
+  },
   "aggs": {
     "trip_duration_stats": {
       "extended_stats": {
@@ -53,8 +82,13 @@ GET /divvy/trip/_search
 
 # Get trip duration percentiles.
 #
-GET /divvy/trip/_search
+GET /divvy/_search
 {
+  "query": {
+    "term": {
+      "_index": "divvy"
+    }
+  },
   "aggs": {
     "trip_length": {
       "percentiles": {
@@ -72,10 +106,12 @@ GET /divvy/trip/_search
 
 # Get trip duration stats by gender.
 #
-GET /divvy/trip/_search
+GET /divvy/_search
 {
   "query": {
-    "match_all": {}
+    "term": {
+      "_index": "divvy"
+    }
   },
   "aggs": {
     "gender": {
@@ -97,8 +133,13 @@ GET /divvy/trip/_search
 
 # Aggregate based on distance to Spantree's office.
 #
-GET /divvy/station/_search
+GET /divvy/_search
 {
+  "query": {
+    "term": {
+      "_index": "divvy"
+    }
+  },
   "aggs": {
     "spantree_dist": {
       "geo_distance": {
@@ -140,8 +181,13 @@ GET /divvy/station/_search
 
 # Get trip duration histogram.
 #
-GET /divvy/trip/_search
+GET /divvy/_search
 {
+  "query": {
+    "term": {
+      "_index": "divvy"
+    }
+  },
   "aggs": {
     "trip_length": {
       "histogram": {
@@ -156,13 +202,18 @@ GET /divvy/trip/_search
 
 # Get bike trips over time. We can also create histograms by date.
 #
-GET /divvy/trip/_search
+GET /divvy/_search
 {
+  "query": {
+    "term": {
+      "_index": "divvy"
+    }
+  },
   "aggs": {
     "trips_over_time": {
       "date_histogram": {
         "field": "start_time",
-        "interval": "week"
+        "fixed_interval": "week"
       }
     }
   },
@@ -171,8 +222,13 @@ GET /divvy/trip/_search
 
 # Get oldest two movies by genre.
 #
-GET /freebase/film/_search
+GET /freebase/_search
 {
+  "query": {
+    "term": {
+      "_index": "freebase"
+    }
+  },
   "aggs": {
     "top_genres": {
       "terms": {
@@ -190,7 +246,7 @@ GET /freebase/film/_search
               }
             ],
             "_source": {
-              "include": [
+              "includes": [
                 "name",
                 "initial_release_date"
               ]
@@ -206,8 +262,13 @@ GET /freebase/film/_search
 
 # Get top 3 directors by genre
 #
-GET /freebase/film/_search
+GET /freebase/_search
 {
+  "query": {
+    "term": {
+      "_index": "freebase"
+    }
+  },
   "aggs": {
     "top_genres": {
       "terms": {
@@ -217,7 +278,7 @@ GET /freebase/film/_search
       "aggs": {
         "top_directors": {
           "terms": {
-            "field": "directed_by.raw",
+            "field": "directed_by.keyword",
             "size": 3
           }
         }
diff --git a/exercises/getting-started.sense b/exercises/getting-started.sense
index 1fc74579..926f27ef 100644
--- a/exercises/getting-started.sense
+++ b/exercises/getting-started.sense
@@ -7,12 +7,12 @@
 GET /
 
 # Index a single document. Now, we will insert a single document
-# into Elasticsearch. Note that we don't need to create an index or
-# type, it gets created automatically if it doesn't already exist.
+# into Elasticsearch. Note that we don't need to create an index
+# as it gets created automatically if it doesn't already exist.
 # Elasticsearch will also try to guess the types for document fields
 # based on the initial JSON payload.
 #
-PUT /getting-started/locations/frontera_grill
+PUT /getting-started/_doc/frontera_grill
 {
     "name": "Frontera Grill",
     "url": "http://en.wikipedia.org/wiki/Frontera_Grill",
@@ -27,7 +27,7 @@ PUT /getting-started/locations/frontera_grill
 # Fetch our document. You can retrieve a single document by its ID
 # with a simple HTTP GET request.
 #
-GET /getting-started/locations/frontera_grill
+GET /getting-started/_doc/frontera_grill
 
 # Finding all documents. We can also execute a request to get all
 # documents in this index.  At this point, there should only be one.
@@ -35,6 +35,6 @@ GET /getting-started/locations/frontera_grill
 GET /getting-started/_search
 
 # Review the mappings. We can also peek at the mappings Elasticsearch
-# automatically generated for the location document type.
+# automatically generated for the index.
 #
-GET /getting-started/locations/_mapping
+GET /getting-started/_mapping
diff --git a/exercises/indexing.sense b/exercises/indexing.sense
index 1439c429..b10931b7 100644
--- a/exercises/indexing.sense
+++ b/exercises/indexing.sense
@@ -3,7 +3,7 @@
 
 # Index John Doe with an assigned ID.
 #
-POST /spantree/people/
+POST /spantree/_doc/
 {
   "name": "Johnny Noname"
 }
@@ -18,7 +18,7 @@ GET /spantree/_mapping
 
 # Index Cedric with a known ID.
 #
-PUT /spantree/people/cedric
+PUT /spantree/_doc/cedric
 {
   "name": "Cedric Hurst",
   "title": "Principal"
@@ -26,11 +26,11 @@ PUT /spantree/people/cedric
 
 # Make Sure Cedric is there.
 #
-GET /spantree/people/cedric
+GET /spantree/_doc/cedric
 
 # Add more information about Cedric.
 #
-POST /spantree/people/cedric/_update
+POST /spantree/_update/cedric
 {
   "doc": {
     "git_commits": 2560
@@ -39,15 +39,16 @@ POST /spantree/people/cedric/_update
 
 # Add one more git commit for Cedric.
 #
-POST /spantree/people/cedric/_update
+POST /spantree/_update/cedric
 {
-  "script": "ctx._source.git_commits += 1",
-  "lang": "groovy"
+  "script": {
+    "source": "ctx._source.git_commits += 1"
+  }
 }
 
 # Upsert Kevin.
 #
-POST /spantree/people/kevin/_update
+POST /spantree/_update/kevin
 {
   "doc": {
     "git_commits": 1912
@@ -61,14 +62,14 @@ POST /spantree/people/kevin/_update
 
 # Make sure Kevin is still there.
 #
-GET /spantree/people/kevin
+GET /spantree/_doc/kevin
 
 # Add everyone else.
 #
 POST /spantree/_bulk
-{"index":{"_id":"gary","_type": "people"}}
+{"index":{"_id":"gary"}}
 {"name":"Gary Turovsky","title":"Senior Software Engineer","git_commits":  611}
-{"index":{"_id":"jonathan","_type": "people"}}
+{"index":{"_id":"jonathan"}}
 {"name":"Jonathan Freeman","title":"Software Engineer","git_commits": 186}
 
 # Review the whole list.
diff --git a/exercises/more-like-this.sense b/exercises/more-like-this.sense
index 4664c5b0..b4c69850 100644
--- a/exercises/more-like-this.sense
+++ b/exercises/more-like-this.sense
@@ -16,7 +16,6 @@ POST /wikipedia/_search
         "like" : [
         {
             "_index" : "wikipedia",
-            "_type" : "locations",
             "_id" : "northwestern_university_settlement_house"
         }
         ],
@@ -31,14 +30,13 @@ POST /wikipedia/_search
 # query.
 POST /wikipedia/_search
 {
-  "fields" : ["about"],
+  "_source" : ["about"],
   "query": {
     "more_like_this" : {
         "fields" : ["name", "about", "description"],
         "like" : [
         {
             "_index" : "wikipedia",
-            "_type" : "locations",
             "_id" : "northwestern_university_settlement_house"
         }
         ],
diff --git a/exercises/paging-and-sorting.sense b/exercises/paging-and-sorting.sense
index c90bfcb3..44059eca 100644
--- a/exercises/paging-and-sorting.sense
+++ b/exercises/paging-and-sorting.sense
@@ -6,7 +6,7 @@
 #
 GET /wikipedia/_search
 {
-  "fields": ["name", "coordinates"],
+  "_source": ["name", "coordinates"],
   "query": {
     "bool": {
       "must": [
@@ -26,7 +26,7 @@ GET /wikipedia/_search
 #
 GET /wikipedia/_search
 {
-  "fields": ["name", "coordinates"],
+  "_source": ["name", "coordinates"],
   "query": {
     "bool": {
       "must": [
@@ -38,41 +38,71 @@ GET /wikipedia/_search
   "from": 10
 }
 
-# Start a scan query. For frequently-changing data sets, it is often
+# Start a point in time query. For frequently-changing data sets, it is often
 # difficult to keep search results consistent across pages. For
 # example, if a user is sorting results by freshness, a search result
 # once appeared in position 10 may be in position 11 by the time the
 # second page is requested. Elasticsearch has the ability to retain a
-# previously fetched result set via a "scan query".  This is similar
-# to a JDBC cursor.
+# previously fetched result set.
 #
-# Note: make sure to copy the scroll id from the result because we're
+# Note: make sure to copy the pit id from the result because we're
 # going to need it in the next step
 #
-GET /wikipedia/_search?search_type=scan&scroll=10m&size=10
+POST /wikipedia/_pit?keep_alive=10m
+
+# Now use the pit id in a search request
+#
+GET /_search
+{
+  "pit": {
+    "id": "YOUR_PIT_ID",
+    "keep_alive": "10m"
+  },
+  "_source": ["name", "coordinates"],
+  "query": {
+    "bool": {
+      "must": [
+        {"query_string": {"query": "chicago"}}
+      ]
+    }
+  },
+  "size": 10
+}
+
+# Access the next page using search_after
+#
+GET /_search
 {
-  "fields": ["name", "coordinates"],
+  "pit": {
+    "id": "YOUR_PIT_ID",
+    "keep_alive": "10m"
+  },
+  "_source": ["name", "coordinates"],
   "query": {
     "bool": {
       "must": [
         {"query_string": {"query": "chicago"}}
       ]
     }
-  }
+  },
+  "size": 10,
+  "search_after": [YOUR_SORT_VALUES],
+  "sort": [{"_score": "desc"}, {"_id": "asc"}]
 }
 
-## Continue our scan query. Use the field _scroll_id from the above
-## query in the following query. Note: you will have to manually
-## manipulate the query string with the scroll id from the last query.
+# Clean up the PIT when done
 #
-GET /_search/scroll?scroll=10m&scroll_id={scroll_id}
+DELETE /_pit
+{
+  "id": "YOUR_PIT_ID"
+}
 
 # Sort dates chronologically. Sorting documents matching "chicago" by
 # last update time.
 #
 GET /wikipedia/_search
 {
-  "fields": ["name", "lastUpdated"],
+  "_source": ["name", "lastUpdated"],
   "query": {
     "bool": {
       "must": [
@@ -88,7 +118,7 @@ GET /wikipedia/_search
 #
 GET /wikipedia/_search
 {
-  "fields": ["name", "lastUpdated"],
+  "_source": ["name", "lastUpdated"],
   "query": {
     "bool": {
       "must": [{"query_string": {"query": "chicago"}}]
@@ -102,7 +132,7 @@ GET /wikipedia/_search
 #
 GET /wikipedia/_search
 {
-  "fields": ["name", "coordinates"],
+  "_source": ["name", "coordinates"],
   "query": {
     "bool": {
       "must": [
@@ -131,9 +161,9 @@ GET /wikipedia/_search
 # inverted index. Because our name field is tokenized, the first
 # alphabetical token in the field value determines a field's ranking.
 #
-GET /wikipedia/locations/_search
+GET /wikipedia/_search
 {
-  "fields": ["name"],
+  "_source": ["name"],
   "query": {
     "bool": {
       "must": [{
@@ -156,21 +186,25 @@ GET /wikipedia/_settings?name=*.sort*.*
 # Store the sortable string in a multi-field. For the name field, we
 # want to do both full-text search and string sorting. So to make sure
 # we can handle both, we configure name to be a multi-field.
-GET /wikipedia/locations/_mapping
+GET /wikipedia/_mapping
 
 # Testing the sortable analyzer. When creating new analyzers, we
 # recommend testing them out using the Analyze API to make sure they
 # work as expected.
 #
-GET /wikipedia/_analyze?field=name.sortable&text=I wanna know what love is; I want you to show me!
+GET /wikipedia/_analyze
+{
+  "field": "name.sortable",
+  "text": "I wanna know what love is; I want you to show me!"
+}
 
 # Sort Strings the Right Way. Now that we've done all that, we simply
 # swap out the sort field from the previous query to use
-# `name.sorted`.
+# `name.sortable`.
 
-GET /wikipedia/locations/_search
+GET /wikipedia/_search
 {
-  "fields": ["name"],
+  "_source": ["name"],
   "query": {
     "bool": {
       "must": [{
diff --git a/exercises/percolators.sense b/exercises/percolators.sense
index 2a992c61..fa3bf13e 100644
--- a/exercises/percolators.sense
+++ b/exercises/percolators.sense
@@ -3,7 +3,7 @@
 
 # Add a sample document.
 #
-PUT /spantree/team/cedric
+PUT /spantree/_doc/cedric
 {
   "drinks": ["Red Bull"]
 }
@@ -18,73 +18,101 @@ PUT /spantree/team/cedric
 # process at Spantree, where we'd like to be notified if they modify
 # our scheduled amazon orders for various teas.
 #
-# First, we will register a percolator for people who like to drink
-# earl grey.
+# First, we'll create a percolator mapping
 #
-PUT /spantree/.percolator/earl_grey
+PUT /percolate_queries
 {
-	"query" : {
-		"match" : {
-			"drinks": "earl grey"
-		}
-	}
+  "mappings": {
+    "properties": {
+      "query": {
+        "type": "percolator"
+      },
+      "drinks": {
+        "type": "keyword"
+      }
+    }
+  }
+}
+
+# Register a percolator for people who like to drink earl grey.
+#
+PUT /percolate_queries/_doc/earl_grey
+{
+  "query": {
+    "match": {
+      "drinks": "earl grey"
+    }
+  }
 }
 
 # Add Percolator for Russian Caravan.
 #
-PUT /spantree/.percolator/russian_caravan
+PUT /percolate_queries/_doc/russian_caravan
 {
-	"query" : {
-		"match" : {
-			"drinks": "russian caravan"
-		}
-	}
+  "query": {
+    "match": {
+      "drinks": "russian caravan"
+    }
+  }
 }
 
 # Onboard Justin with his drink preferences.
 #
-#
-GET /spantree/people/_percolate
+GET /percolate_queries/_search
 {
-	"doc" : {
-		"name": "Justin",
-		"drinks": [
-		  "Triple Espresso",
-		  "Green Tea with Brown Rice",
-		  "Coconut Water"
-		]
-	}
+  "query": {
+    "percolate": {
+      "field": "query",
+      "document": {
+        "name": "Justin",
+        "drinks": [
+          "Triple Espresso",
+          "Green Tea with Brown Rice",
+          "Coconut Water"
+        ]
+      }
+    }
+  }
 }
 
 # Onboard Kevin with his drink preferences.
 #
-GET /spantree/people/_percolate
+GET /percolate_queries/_search
 {
-	"doc" : {
-		"name": "Kevin",
-		"drinks": [
-		  "Sodastream Energy",
-		  "Dark Magic Coffee",
-		  "Earl Grey Tea"
-		]
-	}
+  "query": {
+    "percolate": {
+      "field": "query",
+      "document": {
+        "name": "Kevin",
+        "drinks": [
+          "Sodastream Energy",
+          "Dark Magic Coffee",
+          "Earl Grey Tea"
+        ]
+      }
+    }
+  }
 }
 
 # Review the earl grey percolator.
 #
-GET /spantree/.percolator/earl_grey
+GET /percolate_queries/_doc/earl_grey
 
 # Onboard Marija with her drink preferences.
 #
-#
-GET /spantree/people/_percolate
+GET /percolate_queries/_search
 {
-	"doc": {
-		"name": "Marija",
-		"drinks": [
-		  "Earl Grey Tea",
-		  "Russian Caravan Tea",
-		  "Assam Tea"
-		]
-	}
+  "query": {
+    "percolate": {
+      "field": "query",
+      "document": {
+        "name": "Marija",
+        "drinks": [
+          "Earl Grey Tea",
+          "Russian Caravan Tea",
+          "Assam Tea"
+        ]
+      }
+    }
+  }
 }
diff --git a/exercises/searching.sense b/exercises/searching.sense
index dfc768e5..833d959a 100644
--- a/exercises/searching.sense
+++ b/exercises/searching.sense
@@ -61,7 +61,7 @@ GET /wikipedia/_search
       "query": "theater OR theatre"
     }
   },
-  "fields": ["name", "keywords"]
+  "_source": ["name", "keywords"]
 }
 
 # Search with Lucene boolean syntax. Find both the terms "theater" and
@@ -69,7 +69,7 @@ GET /wikipedia/_search
 #
 GET /wikipedia/_search
 {
-  "fields": ["name", "keywords", "description"],
+  "_source": ["name", "keywords", "description"],
   "query": {
     "query_string": {
       "fields": ["name", "keywords", "description"],
@@ -82,7 +82,7 @@ GET /wikipedia/_search
 #
 GET /wikipedia/_search
 {
-  "fields": ["name", "keywords", "description"],
+  "_source": ["name", "keywords", "description"],
   "query": {
     "bool": {
       "must": [
@@ -109,7 +109,7 @@ GET /wikipedia/_search
 #
 GET /wikipedia/_search
 {
-  "fields": ["name", "keywords", "description"],
+  "_source": ["name", "keywords", "description"],
   "query": {
     "bool": {
       "must": [
@@ -133,7 +133,7 @@ GET /wikipedia/_search
 #
 GET /wikipedia/_search
 {
-  "fields": ["name", "keywords", "description"],
+  "_source": ["name", "keywords", "description"],
   "query": {
     "bool": {
       "should": [
@@ -158,7 +158,7 @@ GET /wikipedia/_search
 #
 GET /wikipedia/_search
 {
-  "fields": ["name", "description"],
+  "_source": ["name", "description"],
   "query": {
     "bool": {
       "must": [
@@ -181,7 +181,7 @@ GET /wikipedia/_search
 # Explain results. The explain endpoint will expose some of the math
 # behind filtering and scoring.
 #
-GET /wikipedia/locations/chicago_shakespeare_theater/_explain
+GET /wikipedia/_doc/chicago_shakespeare_theater/_explain
 {
     "query": {
         "query_string": {
@@ -199,7 +199,7 @@ GET /wikipedia/locations/chicago_shakespeare_theater/_explain
 #
 GET /wikipedia/_search
 {
-  "fields": ["name", "keywords", "about"],
+  "_source": ["name", "keywords", "about"],
   "query": {
     "query_string": {
       "fields": ["name^2", "keywords^1.5", "about"],
@@ -213,7 +213,7 @@ GET /wikipedia/_search
 #
 GET /wikipedia/_search
 {
-  "fields": [
+  "_source": [
       "name",
       "keywords",
       "about"
@@ -232,8 +232,9 @@ GET /wikipedia/_search
             }
         },
         "script_score": {
-          "script": "_score * 2",
-          "lang": "groovy"
+          "script": {
+            "source": "_score * 2"
+          }
         }
     }
   }
@@ -257,18 +258,26 @@ GET /wikipedia/_search
 
 # Filter on geo-distance.
 #
-GET /divvy/station/_search
+GET /divvy/_search
 {
   "query": {
-    "match_all": {}
-  },
-  "filter": {
-    "geo_distance" : {
-      "distance": "1mi",
-      "location" : {
-        "lat": 41.886732,
-        "lon": -87.655979
-      }
+    "bool": {
+      "filter": [
+        {
+          "term": {
+            "_index": "divvy"
+          }
+        },
+        {
+          "geo_distance": {
+            "distance": "1mi",
+            "location": {
+              "lat": 41.886732,
+              "lon": -87.655979
+            }
+          }
+        }
+      ]
     }
   }
 }
diff --git a/exercises/suggestions.sense b/exercises/suggestions.sense
index ecec0166..6c003d8e 100644
--- a/exercises/suggestions.sense
+++ b/exercises/suggestions.sense
@@ -4,12 +4,14 @@
 # Build a term suggester. These provide suggestions based on
 # Levenshtein (edit) distance.
 #
-POST /wikipedia/_suggest
+POST /wikipedia/_search
 {
-  "term_suggestion": {
-    "text": "buildng",
-    "term": {
-      "field": "description"
+  "suggest": {
+    "term_suggestion": {
+      "text": "buildng",
+      "term": {
+        "field": "description"
+      }
     }
   }
 }
@@ -18,30 +20,32 @@ POST /wikipedia/_suggest
 # suggesters to return entire corrected phrases based on a sequence
 # of words in the corpus (ngrams).
 #
-POST /wikipedia/_suggest
+POST /wikipedia/_search
 {
-  "text": "high risk buildng",
-  "simple_phrase": {
-    "phrase": {
-      "field": "description",
-      "gram_size": 2,
-      "real_word_error_likelihood": 0.95,
-      "confidence": 1,
-      "max_errors": 100,
-      "size": 1,
-      "analyzer": "standard",
-      "shard_size": 5,
-      "direct_generator": [
-        {
-          "field": "description",
-          "suggest_mode": "popular",
-          "max_edits": 2,
-          "min_word_len": 4,
-          "max_inspections": 5,
-          "min_doc_freq": 0,
-          "max_term_freq": 0
-        }
-      ]
+  "suggest": {
+    "simple_phrase": {
+      "text": "high risk buildng",
+      "phrase": {
+        "field": "description",
+        "gram_size": 2,
+        "real_word_error_likelihood": 0.95,
+        "confidence": 1,
+        "max_errors": 100,
+        "size": 1,
+        "analyzer": "standard",
+        "shard_size": 5,
+        "direct_generator": [
+          {
+            "field": "description",
+            "suggest_mode": "popular",
+            "max_edits": 2,
+            "min_word_len": 4,
+            "max_inspections": 5,
+            "min_doc_freq": 0,
+            "max_term_freq": 0
+          }
+        ]
+      }
     }
   }
 }
@@ -53,72 +57,72 @@ POST /wikipedia/_suggest
 # to assign weights and priorities to different options.
 #
 PUT /suggestions
-
-# Create completion suggestion mapping.
-#
-PUT /suggestions/suggestion/_mapping
 {
-  "suggestion" : {
-        "properties" : {
-            "name" : { "type" : "string" },
-            "suggest" : {
-              "type" : "completion",
-              "analyzer" : "simple",
-              "search_analyzer" : "simple",
-              "payloads" : true
-            }
-        }
+  "mappings": {
+    "properties": {
+      "name": { 
+        "type": "text" 
+      },
+      "suggest": {
+        "type": "completion",
+        "analyzer": "simple",
+        "search_analyzer": "simple",
+        "preserve_position_increments": true
+      }
     }
+  }
 }
 
 # Create a completion suggestion for Cedric
 #
-PUT /suggestions/suggestion/1
+PUT /suggestions/_doc/1
 {
-    "name" : "Cedric Hurst",
-    "suggest" : {
-        "input": [ "Cedster", "The Ced", "C-Man", "That guy from that one meetup", "Software Engineer" ],
-        "output": "Cedric",
-        "payload" : { "title" : "Principal" },
-        "weight" : 37
-    }
+  "name": "Cedric Hurst",
+  "suggest": {
+    "input": [ "Cedster", "The Ced", "C-Man", "That guy from that one meetup", "Software Engineer" ],
+    "weight": 37
+  },
+  "title": "Principal"
 }
 
 # Create a completion suggestion for Kevin.
 #
-PUT /suggestions/suggestion/2
+PUT /suggestions/_doc/2
 {
-    "name" : "Kevin Greene",
-    "suggest" : {
-        "input": [ "Kev", "KG", "Greene", "Michigan", "Software Engineer" ],
-        "output": "Kevin",
-        "payload" : { "title" : "Senior Software Engineer" },
-        "weight" : 101
-    }
+  "name": "Kevin Greene",
+  "suggest": {
+    "input": [ "Kev", "KG", "Greene", "Michigan", "Software Engineer" ],
+    "weight": 101
+  },
+  "title": "Senior Software Engineer"
 }
 
 # Get a completion suggestion.
 #
-POST /suggestions/_suggest
+POST /suggestions/_search
 {
-  "completion_suggestion": {
-    "text": "software",
-    "completion": {
-      "field": "suggest"
+  "suggest": {
+    "completion_suggestion": {
+      "prefix": "software",
+      "completion": {
+        "field": "suggest"
+      }
     }
   }
 }
 
 # Get a fuzzy completion suggestion.
 #
-POST /suggestions/_suggest
+POST /suggestions/_search
 {
-  "completion_suggestion": {
-    "text": "mchgan",
-    "completion": {
-      "field": "suggest",
-      "fuzzy" : {
-        "fuzziness" : 2
+  "suggest": {
+    "completion_suggestion": {
+      "prefix": "mchgan",
+      "completion": {
+        "field": "suggest",
+        "fuzzy": {
+          "fuzziness": 2
+        }
       }
     }
   }
@@ -130,85 +134,84 @@ POST /suggestions/_suggest
 # You may want to boost suggestions for people who work in your same
 # country or office. Context suggesters allow you to combine the two.
 #
-PUT /suggestions/conference/_mapping
+PUT /suggestions
 {
-    "conference": {
-        "properties": {
-            "name": {
-                "type": "string"
-            },
-            "suggestion": {
-                "type": "completion",
-                "context": {
-                    "location": {
-                        "type": "geo",
-                        "precision": "500km",
-                        "neighbors": true,
-                        "default": "u33"
-                    }
-                }
-            }
+  "mappings": {
+    "properties": {
+      "name": {
+        "type": "text"
+      },
+      "suggestion": {
+        "type": "completion",
+        "contexts": {
+          "location": {
+            "type": "geo",
+            "precision": "500km",
+            "neighbors": true
+          }
         }
+      }
     }
+  }
 }
 
 ## Create GOTO Chicago conference suggestion
 #
-PUT /suggestions/conference/goto_chicago
+PUT /suggestions/_doc/goto_chicago
 {
-    "name": "GOTO Chicago",
-    "suggestion": {
-        "input": [
-            "goto",
-            "chicago",
-            "tech conference"
-        ],
-        "output": "GOTO Chicago",
-        "context": {
-            "location": {
-                "lat": 41.8927539,
-                "lon": -87.6191727
-            }
-        }
+  "name": "GOTO Chicago",
+  "suggestion": {
+    "input": [
+      "goto",
+      "chicago",
+      "tech conference"
+    ],
+    "contexts": {
+      "location": {
+        "lat": 41.8927539,
+        "lon": -87.6191727
+      }
     }
+  }
 }
 
 # Create Strangeloop context suggestion
 #
-PUT /suggestions/conference/strangeloop
+PUT /suggestions/_doc/strangeloop
 {
-    "name": "Strangeloop Conference",
-    "suggestion": {
-        "input": [
-            "strange",
-            "loop",
-            "tech conference"
-        ],
-        "output": "Strangeloop Conference",
-        "context": {
-            "location": {
-                "lat": 38.6537065,
-                "lon": -90.2477908
-            }
-        }
+  "name": "Strangeloop Conference",
+  "suggestion": {
+    "input": [
+      "strange",
+      "loop",
+      "tech conference"
+    ],
+    "contexts": {
+      "location": {
+        "lat": 38.6537065,
+        "lon": -90.2477908
+      }
     }
+  }
 }
 
 # Suggest nearby nerdy things when in St Louis.
 #
-POST suggestions/_suggest
+POST suggestions/_search
 {
+  "suggest": {
     "context_suggestion": {
-        "text": "tech",
-        "completion": {
-            "field": "suggestion",
-            "size": 10,
-            "context": {
-                "location": {
-                    "lat": 39.626072,
-                    "lon": -90.0769822
-                }
-            }
+      "prefix": "tech",
+      "completion": {
+        "field": "suggestion",
+        "size": 10,
+        "contexts": {
+          "location": {
+            "lat": 39.626072,
+            "lon": -90.0769822
+          }
         }
+      }
     }
+  }
 }
diff --git a/extract-data.sh b/extract-data.sh
new file mode 100755
index 00000000..df49710d
--- /dev/null
+++ b/extract-data.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+set -o errexit # exit when a command fails
+set -o pipefail # exit if pipe fails
+set -o nounset # exit when script tries to use undeclared variables
+
+ES_HOST="http://localhost:9201"
+OUTPUT_DIR=".data"
+SCROLL_TIME="1m"
+BATCH_SIZE=1000
+
+# Make sure output directory exists
+mkdir -p "$OUTPUT_DIR"
+
+# Get list of indices
+INDICES=$(curl -s "${ES_HOST}/_cat/indices?h=index" | grep -v "freebase" | tr -d ' ')
+
+for INDEX in $INDICES; do
+  echo "Extracting data from index: $INDEX"
+  OUTPUT_FILE="${OUTPUT_DIR}/${INDEX}.jsonl"
+  
+  # Initialize the scroll
+  SCROLL_ID=$(curl -s -X POST "${ES_HOST}/${INDEX}/_search?scroll=${SCROLL_TIME}" -d '{
+    "size": '${BATCH_SIZE}',
+    "sort": ["_doc"],
+    "query": {"match_all": {}}
+  }' | jq -r '._scroll_id')
+  
+  # Get the first batch of results
+  RESULTS=$(curl -s -X POST "${ES_HOST}/_search/scroll" -d '{
+    "scroll": "'${SCROLL_TIME}'",
+    "scroll_id": "'${SCROLL_ID}'"
+  }')
+  
+  # Extract hits
+  echo "$RESULTS" | jq -c '.hits.hits[]._source' > "$OUTPUT_FILE"
+  
+  # Continue scrolling until no more hits
+  TOTAL_HITS=$(echo "$RESULTS" | jq '.hits.total')
+  HITS_COUNT=$(echo "$RESULTS" | jq '.hits.hits | length')
+  
+  while [ "$HITS_COUNT" -gt 0 ]; do
+    echo "Processed $HITS_COUNT documents from $INDEX..."
+    
+    # Get the next batch of results
+    RESULTS=$(curl -s -X POST "${ES_HOST}/_search/scroll" -d '{
+      "scroll": "'${SCROLL_TIME}'",
+      "scroll_id": "'${SCROLL_ID}'"
+    }')
+    
+    # Extract hits and append to file
+    echo "$RESULTS" | jq -c '.hits.hits[]._source' >> "$OUTPUT_FILE"
+    
+    # Update scroll ID and counts
+    SCROLL_ID=$(echo "$RESULTS" | jq -r '._scroll_id')
+    HITS_COUNT=$(echo "$RESULTS" | jq '.hits.hits | length')
+  done
+  
+  # Clean up the scroll
+  curl -s -X DELETE "${ES_HOST}/_search/scroll" -d '{
+    "scroll_id": ["'${SCROLL_ID}'"]
+  }' > /dev/null
+  
+  echo "Completed extracting data from $INDEX"
+  echo "Data saved to $OUTPUT_FILE"
+  echo
+done
+
+echo "All data extraction complete!" 
\ No newline at end of file
diff --git a/extract-freebase.sh b/extract-freebase.sh
new file mode 100755
index 00000000..b46acccc
--- /dev/null
+++ b/extract-freebase.sh
@@ -0,0 +1,143 @@
+#!/usr/bin/env bash
+
+set -o errexit # exit when a command fails
+set -o pipefail # exit if pipe fails
+set -o nounset # exit when script tries to use undeclared variables
+
+ES_HOST="http://localhost:9201"
+OUTPUT_DIR=".data"
+OUTPUT_FILE="${OUTPUT_DIR}/freebase.jsonl"
+SCROLL_TIME="1m"
+BATCH_SIZE=1000
+MAX_ATTEMPTS=5
+
+mkdir -p "$OUTPUT_DIR"
+
+# Function to check if we got valid JSON response
+function is_valid_json() {
+  if echo "$1" | jq -e . >/dev/null 2>&1; then
+    return 0
+  else
+    return 1
+  fi
+}
+
+echo "Attempting to extract data from freebase index..."
+
+# Try to get a count of documents
+COUNT_RESPONSE=$(curl -s "${ES_HOST}/freebase/_count")
+if is_valid_json "$COUNT_RESPONSE" && [[ $(echo "$COUNT_RESPONSE" | jq -r '.count // 0') -gt 0 ]]; then
+  DOC_COUNT=$(echo "$COUNT_RESPONSE" | jq -r '.count')
+  echo "Found $DOC_COUNT documents in freebase index"
+else
+  echo "Warning: Unable to get document count. Will attempt extraction anyway."
+fi
+
+# Try to get some data
+for attempt in $(seq 1 $MAX_ATTEMPTS); do
+  echo "Attempt $attempt of $MAX_ATTEMPTS to extract data..."
+  
+  # Try to get a small batch of data
+  RESPONSE=$(curl -s "${ES_HOST}/freebase/_search?size=10" -d '{
+    "query": {"match_all": {}},
+    "sort": ["_doc"]
+  }')
+  
+  if is_valid_json "$RESPONSE" && [[ $(echo "$RESPONSE" | jq -r '.hits.hits | length') -gt 0 ]]; then
+    echo "Successfully retrieved documents. Starting full extraction..."
+    break
+  fi
+  
+  echo "No data found or encountered an error. Waiting 5 seconds before retrying..."
+  sleep 5
+  
+  if [[ $attempt -eq $MAX_ATTEMPTS ]]; then
+    echo "Failed to extract data after $MAX_ATTEMPTS attempts."
+    echo "The index may still be initializing or is empty."
+    exit 1
+  fi
+done
+
+# Initialize output file
+> "$OUTPUT_FILE"
+
+# Initialize the scroll
+echo "Initializing scroll for bulk extraction..."
+SCROLL_RESPONSE=$(curl -s -X POST "${ES_HOST}/freebase/_search?scroll=${SCROLL_TIME}&ignore_unavailable=true" -d '{
+  "size": '${BATCH_SIZE}',
+  "sort": ["_doc"],
+  "query": {"match_all": {}}
+}')
+
+if ! is_valid_json "$SCROLL_RESPONSE"; then
+  echo "Error: Invalid response from Elasticsearch."
+  echo "Response: $SCROLL_RESPONSE"
+  exit 1
+fi
+
+# Check if we got any hits at all
+HITS_COUNT=$(echo "$SCROLL_RESPONSE" | jq '.hits.hits | length')
+if [[ $HITS_COUNT -eq 0 ]]; then
+  echo "No documents found in the freebase index."
+  exit 0
+fi
+
+# Extract hits
+echo "$SCROLL_RESPONSE" | jq -c '.hits.hits[]._source' >> "$OUTPUT_FILE"
+SCROLL_ID=$(echo "$SCROLL_RESPONSE" | jq -r '._scroll_id')
+
+# Continue scrolling until no more hits
+TOTAL_EXTRACTED=0
+EXTRACTED_THIS_BATCH=$HITS_COUNT
+((TOTAL_EXTRACTED += EXTRACTED_THIS_BATCH))
+
+echo "Extracted $EXTRACTED_THIS_BATCH documents. Continuing..."
+
+while [[ $EXTRACTED_THIS_BATCH -gt 0 ]]; do
+  # Get the next batch of results
+  SCROLL_RESPONSE=$(curl -s -X POST "${ES_HOST}/_search/scroll" -d '{
+    "scroll": "'${SCROLL_TIME}'",
+    "scroll_id": "'${SCROLL_ID}'"
+  }')
+  
+  if ! is_valid_json "$SCROLL_RESPONSE"; then
+    echo "Error: Invalid response from scroll request."
+    break
+  fi
+  
+  # Extract hits and append to file
+  EXTRACTED_THIS_BATCH=$(echo "$SCROLL_RESPONSE" | jq '.hits.hits | length')
+  
+  if [[ $EXTRACTED_THIS_BATCH -gt 0 ]]; then
+    echo "$SCROLL_RESPONSE" | jq -c '.hits.hits[]._source' >> "$OUTPUT_FILE"
+    SCROLL_ID=$(echo "$SCROLL_RESPONSE" | jq -r '._scroll_id')
+    ((TOTAL_EXTRACTED += EXTRACTED_THIS_BATCH))
+    echo "Extracted $EXTRACTED_THIS_BATCH more documents. Total: $TOTAL_EXTRACTED"
+  fi
+done
+
+# Clean up the scroll
+curl -s -X DELETE "${ES_HOST}/_search/scroll" -d '{
+  "scroll_id": ["'${SCROLL_ID}'"]
+}' > /dev/null
+
+# Check the file size
+FILE_SIZE=$(wc -c < "$OUTPUT_FILE")
+LINE_COUNT=$(wc -l < "$OUTPUT_FILE")
+
+echo "Extraction completed."
+echo "Extracted $TOTAL_EXTRACTED documents from freebase index."
+echo "Output file size: $FILE_SIZE bytes"
+echo "Line count: $LINE_COUNT"
+
+# Compress the file
+if [[ $FILE_SIZE -gt 0 ]]; then
+  gzip -k "$OUTPUT_FILE"
+  COMPRESSED_SIZE=$(wc -c < "${OUTPUT_FILE}.gz")
+  echo "Compressed file size: $COMPRESSED_SIZE bytes"
+  echo "Compression ratio: $(echo "scale=2; $COMPRESSED_SIZE * 100 / $FILE_SIZE" | bc)%"
+else
+  echo "Output file is empty. Skipping compression."
+fi
+
+echo "Extraction process complete." 
\ No newline at end of file
diff --git a/extract-wikipedia.sh b/extract-wikipedia.sh
new file mode 100755
index 00000000..f2869f2b
--- /dev/null
+++ b/extract-wikipedia.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+set -o errexit # exit when a command fails
+set -o pipefail # exit if pipe fails
+set -o nounset # exit when script tries to use undeclared variables
+
+ES_HOST="http://localhost:9201"
+OUTPUT_DIR=".data"
+OUTPUT_FILE="${OUTPUT_DIR}/wikipedia.jsonl"
+
+mkdir -p "$OUTPUT_DIR"
+
+# Get all documents from the wikipedia index with a single query
+curl -s -X GET "${ES_HOST}/wikipedia/_search?size=100" -d '{
+  "query": {"match_all": {}}
+}' | jq -c '.hits.hits[]._source' > "$OUTPUT_FILE"
+
+echo "Data saved to $OUTPUT_FILE"
+wc -l "$OUTPUT_FILE" 
\ No newline at end of file
diff --git a/load-sample-snapshots.sh b/load-sample-snapshots.sh
index e30f59e8..b27d642a 100755
--- a/load-sample-snapshots.sh
+++ b/load-sample-snapshots.sh
@@ -5,17 +5,18 @@ set -o errexit # exit when a command fails.
 set -o nounset # exit when your script tries to use undeclared variables
 
 REPOSITORY_NAME=sample_readonly
+ES_HOST=http://localhost:9201
 
 __dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
 # Create a read-only snapshot repository to pull files from S3 bucket via HTTPs
-curl -X PUT -d '{"type": "url", "settings": {"url": "https://elasticsearch-sample-data.s3.amazonaws.com/"}}' "http://localhost:9200/_snapshot/${REPOSITORY_NAME}"
+curl -X PUT -d '{"type": "url", "settings": {"url": "https://elasticsearch-sample-data.s3.amazonaws.com/"}}' "${ES_HOST}/_snapshot/${REPOSITORY_NAME}"
 
 # Delete any existing indices
-curl -X DELETE "http://localhost:9200/*"
+curl -X DELETE "${ES_HOST}/*"
 
 # Read snapshots to restore from manifest file
 for SNAPSHOT_NAME in $(cat ./snapshot-manifest); do
   # Restore snapshots with one replica
-  time curl -X POST -d '{"index_settings": {"index.number_of_replicas": 1}}' "http://localhost:9200/_snapshot/${REPOSITORY_NAME}/${SNAPSHOT_NAME}/_restore?wait_for_completion=true"
+  time curl -X POST -d '{"index_settings": {"index.number_of_replicas": 1}}' "${ES_HOST}/_snapshot/${REPOSITORY_NAME}/${SNAPSHOT_NAME}/_restore?wait_for_completion=true"
 done