Skip to content

Implementation of advanced search #206

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Mar 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
cc1a2d9
Implement `Geocaching.advanced_search()` function
BelKed Mar 25, 2023
27c592b
Refactor the `Geocaching.search()` function using the `Geocaching.adv…
BelKed Mar 25, 2023
34a697f
Refactor the `Geocaching.search_rect()` function using the `Geocachin…
BelKed Mar 25, 2023
af54849
Formatting
BelKed Mar 25, 2023
73c00c7
Use a newer version of the search API as it supports more options
BelKed Mar 25, 2023
4c4ab05
Update the docstring for the `Geocaching.search()` method
BelKed Mar 25, 2023
4ee9707
Update the docstring for the `Geocaching.search_rect()` method
BelKed Mar 25, 2023
8d29fcf
Add docstring for the `Geocaching.advanced_search()` method
BelKed Mar 25, 2023
fe1e3df
Correct return type hints
BelKed Mar 25, 2023
a4d9f02
Fix the `origin` format
BelKed Mar 25, 2023
77b35d1
Update the test cassettes
BelKed Mar 25, 2023
ed91ead
Add tests for advanced search
BelKed Mar 25, 2023
243aa50
Sort the imports
BelKed Mar 25, 2023
55618ed
Bring the asterix back
BelKed Mar 25, 2023
b9375d6
Reorder the search method arguments in a more meaningful way
BelKed Mar 25, 2023
5da3bcb
Add another asterisk, now to the `Geocaching.search()` method
BelKed Mar 25, 2023
eba2d5b
Remove type definitions
BelKed Mar 27, 2023
6ae9ea2
Don't modify the `tolerance` value
BelKed Mar 27, 2023
baf8b3e
Update the value of the `expected` variable to match the current situ…
BelKed Mar 27, 2023
473521f
Update the test cassette
BelKed Mar 27, 2023
8c839f4
Move the asterisk to the correct place
BelKed Mar 28, 2023
cc0c92f
Test also the positional arguments
BelKed Mar 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
241 changes: 107 additions & 134 deletions pycaching/geocaching.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import re
import subprocess
from os import path
from typing import Optional, Union
from typing import Generator, Optional, Union
from urllib.parse import parse_qs, urljoin, urlparse

import bs4
import requests
from bs4.element import Script # Direct import as `bs4.Script` requires version >= 4.9.1.

from pycaching.cache import Cache, Size, Status
from pycaching.cache import Cache
from pycaching.errors import Error, LoginFailedException, NotLoggedInException, PMOnlyException, TooManyRequestsError
from pycaching.geo import Point, Rectangle
from pycaching.log import Log
Expand Down Expand Up @@ -52,7 +52,7 @@ class Geocaching(object):
"search": "play/search",
"search_more": "play/search/more-results",
"my_logs": "my/logs.aspx",
"api_search": "api/proxy/web/search",
"api_search": "api/proxy/web/search/v2",
}
_credentials_file = ".gc_credentials"

Expand Down Expand Up @@ -243,116 +243,43 @@ def get_logged_user(self, login_page=None):
m = re.search(r'"username":\s*"(.*)"', js_content)
return m.group(1) if m else None

def search(self, point, limit=float("inf")):
"""Return a generator of caches around some point.

Search for caches around some point by loading search pages and parsing the data from these
pages. Yield :class:`.Cache` objects filled with data from search page. You can provide limit
as a convenient way to stop generator after certain number of caches.

:param .geo.Point point: Search center point.
:param int limit: Maximum number of caches to generate.
"""
logging.info("Searching at {}".format(point))

start_index = 0
while True:
# get one page
geocaches_table, whole_page = self._search_get_page(point, start_index)
rows = geocaches_table.find_all("tr")

# leave loop if there are no (more) results
if not rows:
return

# prepare language-dependent mappings
if start_index == 0:
cache_sizes_filter_wrapper = whole_page.find("div", class_="cache-sizes-wrapper")
localized_size_mapping = {
# key = "Small" (localized), value = Size.small
label.find("span").text.strip(): Size.from_number(label.find("input").get("value"))
for label in cache_sizes_filter_wrapper.find_all("label")
}

# parse caches in result
for start_index, row in enumerate(rows, start_index):

limit -= 1 # handle limit
if limit < 0:
return

# parse raw data
cache_details = row.find("span", "cache-details").text.split("|")
wp = cache_details[1].strip()

# create and fill cache object
# values are sanitized and converted in Cache setters
c = Cache(self, wp)
c.type = cache_details[0]
c.name = row.find("span", "cache-name").text
badge = row.find("svg", class_="badge")
c.found = "found" in str(badge) if badge is not None else False
c.favorites = row.find(attrs={"data-column": "FavoritePoint"}).text
if not (row.get("class") and "disabled" in row.get("class")):
c.status = Status.enabled
c.pm_only = row.find("td", "pm-upsell") is not None

if c.pm_only:
# PM only caches doesn't have other attributes filled in
yield c
continue

c.size = localized_size_mapping[row.find(attrs={"data-column": "ContainerSize"}).text.strip()]
c.difficulty = row.find(attrs={"data-column": "Difficulty"}).text
c.terrain = row.find(attrs={"data-column": "Terrain"}).text
c.hidden = row.find(attrs={"data-column": "PlaceDate"}).text
c.author = row.find("span", "owner").text[3:] # delete "by "

logging.debug("Cache parsed: {}".format(c))
yield c

start_index += 1

def _search_get_page(self, point, start_index):
"""Return one page for standard search as class:`bs4.BeautifulSoup` object.

:param .geo.Point point: Search center point.
:param int start_index: Determines the page. If start_index is greater than zero, this
method will use AJAX andpoint which is much faster.
def search(
self,
point: Point,
limit: int = float("inf"),
*,
sort_by: Union[str, SortOrder] = SortOrder.date_last_visited,
reverse: bool = False,
per_query: int = 200,
wait_sleep: bool = True,
) -> Generator[Optional[Cache], None, None]:
"""Search for caches around a specified location using a search API.

:param point: The :class:`.geo.Point` object representing the center point of the search.
:param limit: The maximum number of caches to load.
Defaults to infinity.
:param sort_by: The criterion to sort the caches by.
Defaults to :code:`SortOrder.date_last_visited`.
:param reverse: If :code:`True`, the order of the results is reversed.
Defaults to :code:`False`.
:param per_query: The number of caches to request in each query.
Defaults to :code:`200`.
:param wait_sleep: In case of rate limits exceeding, wait appropriate time
if set to :code:`True`, otherwise just yield :code:`None`.
Defaults to :code:`True`.
:return: A generator that yields :class:`.Cache` objects.
"""
assert hasattr(point, "format") and callable(point.format)
logging.debug("Loading page from start_index {}".format(start_index))

if start_index == 0:
# first request has to load normal search page
logging.debug("Using normal search endpoint")

# make request
res = self._request(
self._urls["search"],
params={
"origin": point.format_decimal(),
},
)
return res.find(id="geocaches"), res

else:
# other requests can use AJAX endpoint
logging.debug("Using AJAX search endpoint")

# make request
res = self._request(
self._urls["search_more"],
params={
"origin": point.format_decimal(),
"startIndex": start_index,
"ssvu": 2,
"selectAll": "false",
},
expect="json",
)

return bs4.BeautifulSoup(res["HtmlString"].strip(), "html.parser"), None
return self.advanced_search(
{
"origin": "{},{}".format(point.latitude, point.longitude),
"asc": str(not reverse).lower(),
"sort": sort_by.value,
},
per_query=per_query,
limit=limit,
wait_sleep=wait_sleep,
)

@deprecated
def search_quick(self, area):
Expand All @@ -366,55 +293,99 @@ def search_quick(self, area):

return self.search_rect(area)

# add some shortcuts ------------------------------------------------------

def search_rect(
self,
rect: Rectangle,
limit: int = float("inf"),
*,
per_query: int = 200,
sort_by: Union[str, SortOrder] = SortOrder.date_last_visited,
reverse: bool = False,
limit: int = float("inf"),
per_query: int = 200,
origin: Optional[Point] = None,
wait_sleep: bool = True
):
"""
Return a generator of caches in given Rectange area.

:param rect: Search area.
:param int per_query: Number of caches requested in single query.
:param sort_by: Order cached by given criterion.
:param reverse: Reverse sort order.
:param limit: Maximum number of caches to return.
:param origin: Origin point for search by distance.
:param wait_sleep: In case of rate limits exceeding, wait appropriate time if set True,
otherwise just yield None.
wait_sleep: bool = True,
) -> Generator[Optional[Cache], None, None]:
"""Search for caches in a specified :class:`.Rectangle` area using a search API.

:param rect: The :class:`.Rectangle` object representing the search area.
:param limit: The maximum number of caches to load.
Defaults to infinity.
:param sort_by: The criterion to sort the caches by.
Defaults to :code:`SortOrder.date_last_visited`.
:param reverse: If :code:`True`, the order of the results is reversed.
Defaults to :code:`False`.
:param per_query: The number of caches to request in each query.
Defaults to :code:`200`.
:param origin: The origin point for search by distance, required when sorting by distance.
:param wait_sleep: In case of rate limits exceeding, wait appropriate time
if set to :code:`True`, otherwise just yield :code:`None`.
Defaults to :code:`True`.
:return: A generator that yields :class:`.Cache` objects.
"""

if not isinstance(sort_by, SortOrder):
sort_by = SortOrder(sort_by)

if limit <= 0:
return

take_amount = min(limit, per_query)
params = {
"box": "{},{},{},{}".format(
rect.corners[0].latitude,
rect.corners[0].longitude,
rect.corners[1].latitude,
rect.corners[1].longitude,
),
"take": take_amount,
"asc": str(not reverse).lower(),
"skip": 0,
"sort": sort_by.value,
}

if sort_by is SortOrder.distance:
assert isinstance(origin, Point)
params["origin"] = "{},{}".format(origin.latitude, origin.longitude)

return self.advanced_search(
params,
per_query=per_query,
limit=limit,
wait_sleep=wait_sleep,
)

def advanced_search(
self,
options: dict,
limit: int = float("inf"),
per_query: int = 200,
wait_sleep: bool = True,
) -> Generator[Optional[Cache], None, None]:
"""Perform an advanced search for geocaches with specific search criteria.

The search is performed using the options provided in the :code:`options` parameter.
Example of the :code:`options` parameter::

# https://www.geocaching.com/play/search?owner[0]=Geocaching%20HQ&a=0
options = {"owner[0]": "Geocaching HQ", "a": "0"}

:param options: A dictionary of search options.
:param limit: The maximum number of caches to load.
Defaults to infinity.
:param per_query: The number of caches to request in each query.
Defaults to :code:`200`.
:param wait_sleep: In case of rate limits exceeding, wait appropriate time
if set to :code:`True`, otherwise just yield :code:`None`.
Defaults to :code:`True`.
:return: A generator that yields :class:`.Cache` objects.
"""

if limit <= 0:
return

take_amount = min(limit, per_query)

params = options.copy()
params.update(
{
"take": take_amount,
"skip": 0,
}
)

total, offset = None, 0
while (offset < limit) and ((total is None) or (offset < total)):
params["skip"] = offset
Expand All @@ -434,6 +405,8 @@ def search_rect(
total = resp["total"]
offset += take_amount

# add some shortcuts ------------------------------------------------------

def geocode(self, location):
"""Return a :class:`.Point` object from geocoded location.

Expand Down
Loading