diff --git a/xrspatial/proximity.py b/xrspatial/proximity.py index 11ab5246..a2b9664f 100755 --- a/xrspatial/proximity.py +++ b/xrspatial/proximity.py @@ -224,16 +224,41 @@ def great_circle_distance( @ngjit def _distance(x1, x2, y1, y2, metric): + if metric == EUCLIDEAN: - return euclidean_distance(x1, x2, y1, y2) + d = euclidean_distance(x1, x2, y1, y2) - if metric == GREAT_CIRCLE: - return great_circle_distance(x1, x2, y1, y2) + elif metric == GREAT_CIRCLE: + d = great_circle_distance(x1, x2, y1, y2) - if metric == MANHATTAN: - return manhattan_distance(x1, x2, y1, y2) + else: + # metric == MANHATTAN: + d = manhattan_distance(x1, x2, y1, y2) - return -1.0 + return np.float32(d) + + +@ngjit +def _calc_direction(x1, x2, y1, y2): + # Calculate direction from (x1, y1) to a source cell (x2, y2). + # The output values are based on compass directions, + # 90 to the east, 180 to the south, 270 to the west, and 360 to the north, + # with 0 reserved for the source cell itself + + if x1 == x2 and y1 == y2: + return 0 + + x = x2 - x1 + y = y2 - y1 + d = np.arctan2(-y, x) * 57.29578 + if d < 0: + d = 90.0 - d + elif d > 90.0: + d = 360.0 - d + 90.0 + else: + d = 90.0 - d + + return np.float32(d) @ngjit @@ -269,9 +294,9 @@ def _process_proximity_line( width : np.int64 Image width. It is the number of pixels in the `source_line`. - max_distance : np.float64, maximum distance considered. + max_distance : np.float32, maximum distance considered. line_proximity : numpy.array - 1d numpy array of type np.float64, calculated proximity from + 1d numpy array of type np.float32, calculated proximity from source_line. values : numpy.array 1d numpy array. A list of target pixel values @@ -281,7 +306,7 @@ def _process_proximity_line( Returns ------- self: numpy.array - 1d numpy array of type np.float64. Corresponding proximity of + 1d numpy array of type np.float32. Corresponding proximity of source_line. """ start = width - 1 @@ -376,28 +401,6 @@ def _process_proximity_line( return -@ngjit -def _calc_direction(x1, x2, y1, y2): - # Calculate direction from (x1, y1) to a source cell (x2, y2). - # The output values are based on compass directions, - # 90 to the east, 180 to the south, 270 to the west, and 360 to the north, - # with 0 reserved for the source cell itself - - if x1 == x2 and y1 == y2: - return 0 - - x = x2 - x1 - y = y2 - y1 - d = np.arctan2(-y, x) * 57.29578 - if d < 0: - d = 90.0 - d - elif d > 90.0: - d = 360.0 - d + 90.0 - else: - d = 90.0 - d - return d - - def _process( raster, x, @@ -407,6 +410,7 @@ def _process( distance_metric, process_mode ): + raster_dims = raster.dims if raster_dims != (y, x): raise ValueError( @@ -428,6 +432,10 @@ def _process( if max_distance is None: max_distance = np.inf + max_possible_distance = _distance( + xs[0][0], xs[-1][-1], ys[0][0], ys[-1][-1], distance_metric + ) + @ngjit def _process_numpy(img, x_coords, y_coords): height, width = img.shape @@ -435,9 +443,8 @@ def _process_numpy(img, x_coords, y_coords): pan_near_y = np.zeros(width, dtype=np.int64) # output of the function - img_distance = np.zeros(shape=(height, width), dtype=np.float64) - img_allocation = np.zeros(shape=(height, width), dtype=np.float64) - img_direction = np.zeros(shape=(height, width), dtype=np.float64) + output_img = np.zeros(shape=(height, width), dtype=np.float32) + img_distance = np.zeros(shape=(height, width), dtype=np.float32) # Loop from top to bottom of the image. for i in prange(width): @@ -456,7 +463,7 @@ def _process_numpy(img, x_coords, y_coords): for i in prange(width): scan_line[i] = img[line][i] - line_proximity = np.zeros(width, dtype=np.float64) + line_proximity = np.zeros(width, dtype=np.float32) for i in prange(width): line_proximity[i] = -1.0 @@ -473,14 +480,15 @@ def _process_numpy(img, x_coords, y_coords): ) for i in prange(width): if nearest_xs[i] != -1 and line_proximity[i] >= 0: - img_allocation[line][i] = img[nearest_ys[i], nearest_xs[i]] - d = _calc_direction( - x_coords[line, i], - x_coords[nearest_ys[i], nearest_xs[i]], - y_coords[line, i], - y_coords[nearest_ys[i], nearest_xs[i]], - ) - img_direction[line][i] = d + if process_mode == ALLOCATION: + output_img[line][i] = img[nearest_ys[i], nearest_xs[i]] + elif process_mode == DIRECTION: + output_img[line][i] = _calc_direction( + x_coords[line, i], + x_coords[nearest_ys[i], nearest_xs[i]], + y_coords[line, i], + y_coords[nearest_ys[i], nearest_xs[i]], + ) # right to left for i in prange(width): @@ -497,15 +505,18 @@ def _process_numpy(img, x_coords, y_coords): for i in prange(width): img_distance[line][i] = line_proximity[i] + if nearest_xs[i] != -1 and line_proximity[i] >= 0: - img_allocation[line][i] = img[nearest_ys[i], nearest_xs[i]] - d = _calc_direction( - x_coords[line, i], - x_coords[nearest_ys[i], nearest_xs[i]], - y_coords[line, i], - y_coords[nearest_ys[i], nearest_xs[i]], - ) - img_direction[line][i] = d + if process_mode == ALLOCATION: + output_img[line][i] = img[nearest_ys[i], nearest_xs[i]] + + elif process_mode == DIRECTION: + output_img[line][i] = _calc_direction( + x_coords[line, i], + x_coords[nearest_ys[i], nearest_xs[i]], + y_coords[line, i], + y_coords[nearest_ys[i], nearest_xs[i]], + ) # Loop from bottom to top of the image. for i in prange(width): @@ -536,15 +547,16 @@ def _process_numpy(img, x_coords, y_coords): for i in prange(width): if nearest_xs[i] != -1 and line_proximity[i] >= 0: - img_allocation[line][i] = img[nearest_ys[i], nearest_xs[i]] - d = _calc_direction( - x_coords[line, i], - x_coords[nearest_ys[i], nearest_xs[i]], - y_coords[line, i], - y_coords[nearest_ys[i], nearest_xs[i]], - ) + if process_mode == ALLOCATION: + output_img[line][i] = img[nearest_ys[i], nearest_xs[i]] - img_direction[line][i] = d + elif process_mode == DIRECTION: + output_img[line][i] = _calc_direction( + x_coords[line, i], + x_coords[nearest_ys[i], nearest_xs[i]], + y_coords[line, i], + y_coords[nearest_ys[i], nearest_xs[i]], + ) # Left to right for i in prange(width): @@ -565,35 +577,40 @@ def _process_numpy(img, x_coords, y_coords): line_proximity[i] = np.nan else: if nearest_xs[i] != -1 and line_proximity[i] >= 0: - img_allocation[line][i] = img[ - nearest_ys[i], nearest_xs[i]] - d = _calc_direction( - x_coords[line, i], - x_coords[nearest_ys[i], nearest_xs[i]], - y_coords[line, i], - y_coords[nearest_ys[i], nearest_xs[i]], - ) - img_direction[line][i] = d + if process_mode == ALLOCATION: + output_img[line][i] = img[ + nearest_ys[i], nearest_xs[i]] + + elif process_mode == DIRECTION: + output_img[line][i] = _calc_direction( + x_coords[line, i], + x_coords[nearest_ys[i], nearest_xs[i]], + y_coords[line, i], + y_coords[nearest_ys[i], nearest_xs[i]], + ) for i in prange(width): img_distance[line][i] = line_proximity[i] if process_mode == PROXIMITY: return img_distance - elif process_mode == ALLOCATION: - return img_allocation - elif process_mode == DIRECTION: - return img_direction + else: + return output_img def _process_dask(raster, xs, ys): - # calculate padding for width and height - if not np.isfinite(max_distance): + + if max_distance >= max_possible_distance: # consider all targets in the whole raster + # the data array is computed at once, + # make sure your data fit your memory height, width = raster.shape - pad_y = height - 1 - pad_x = width - 1 + raster.data = raster.data.rechunk({0: height, 1: width}) + xs = xs.rechunk({0: height, 1: width}) + ys = ys.rechunk({0: height, 1: width}) + pad_y = pad_x = 0 else: cellsize_x, cellsize_y = get_dataarray_resolution(raster) + # calculate padding for each chunk pad_y = int(max_distance / cellsize_y + 0.5) pad_x = int(max_distance / cellsize_x + 0.5) @@ -626,7 +643,7 @@ def proximity( x: str = "x", y: str = "y", target_values: list = [], - max_distance: float = None, + max_distance: float = np.inf, distance_metric: str = "EUCLIDEAN", ) -> xr.DataArray: """ @@ -657,20 +674,37 @@ def proximity( ---------- raster : xr.DataArray 2D array image with `raster.shape` = (height, width). + x : str, default='x' Name of x-coordinates. + y : str, default='y' Name of y-coordinates. + target_values: list Target pixel values to measure the distance from. If this option is not provided, proximity will be computed from non-zero pixel values. - max_distance: float + + max_distance: float, default=np.inf The maximum distance to search. Proximity distances greater than - this value will be set to a NaN value. - Note that this must be given in the same unit as input. For example, - if input raster is in latitude/longitude units, `max_distance` should - also be provided in latitude/longitude units. + this value will be set to NaN. + Should be given in the same distance unit as input. + For example, if input raster is in lat-lon and distances between points + within the raster is calculated using Euclidean distance metric, + `max_distance` should also be provided in lat-lon unit. + If using Great Circle distance metric, and thus all distances is in km, + `max_distance` should also be provided in kilometer unit. + + When scaling with Dask, whether the function scales well depends on + the `max_distance` value. If `max_distance` is infinite by default, + this function only works on a single machine. + It should scale well, however, if `max_distance` is relatively small + compared to the maximum possible distance in two arbitrary points + in the input raster. Note that if `max_distance` is equal or larger + than the max possible distance between 2 arbitrary points in the input + raster, the input data array will be rechunked. + distance_metric: str, default='EUCLIDEAN' The metric for calculating distance between 2 points. Valid distance metrics are: @@ -702,9 +736,11 @@ def proximity( [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.] ]) + + n, m = data.shape raster = xr.DataArray(data, dims=['y', 'x'], name='raster') raster['y'] = np.arange(n)[::-1] - raster['x'] = np.arange(n) + raster['x'] = np.arange(m) proximity_agg = proximity(raster) @@ -728,7 +764,7 @@ def proximity( [3.1622777, 2.236068 , 1.4142135, 1. , 1.4142135], [3.6055512, 2.828427 , 2.236068 , 2. , 2.236068 ], [4.2426405, 3.6055512, 3.1622777, 3. , 3.1622777]], - dtype=float64) + dtype=float32) Coordinates: * y (y) int64 4 3 2 1 0 * x (x) int64 0 1 2 3 4 @@ -759,7 +795,7 @@ def allocation( x: str = "x", y: str = "y", target_values: list = [], - max_distance: float = None, + max_distance: float = np.inf, distance_metric: str = "EUCLIDEAN", ): """ @@ -790,20 +826,37 @@ def allocation( ---------- raster : xr.DataArray 2D array of target data. + x : str, default='x' Name of x-coordinates. + y : str, default='y' Name of y-coordinates. + target_values : list Target pixel values to measure the distance from. If this option is not provided, allocation will be computed from non-zero pixel values. - max_distance: float + + max_distance: float, default=np.inf The maximum distance to search. Proximity distances greater than - this value will be set to a NaN value. - Note that this must be given in the same unit as input. For example, - if input raster is in latitude/longitude units, `max_distance` should - also be provided in latitude/longitude units. + this value will be set to NaN. + Should be given in the same distance unit as input. + For example, if input raster is in lat-lon and distances between points + within the raster is calculated using Euclidean distance metric, + `max_distance` should also be provided in lat-lon unit. + If using Great Circle distance metric, and thus all distances is in km, + `max_distance` should also be provided in kilometer unit. + + When scaling with Dask, whether the function scales well depends on + the `max_distance` value. If `max_distance` is infinite by default, + this function only works on a single machine. + It should scale well, however, if `max_distance` is relatively small + compared to the maximum possible distance in two arbitrary points + in the input raster. Note that if `max_distance` is equal or larger + than the max possible distance between 2 arbitrary points in the input + raster, the input data array will be rechunked. + distance_metric : str, default='EUCLIDEAN' The metric for calculating distance between 2 points. Valid distance metrics are: 'EUCLIDEAN', 'GREAT_CIRCLE', and 'MANHATTAN'. @@ -834,9 +887,10 @@ def allocation( [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.] ]) + n, m = data.shape raster = xr.DataArray(data, dims=['y', 'x'], name='raster') raster['y'] = np.arange(n)[::-1] - raster['x'] = np.arange(n) + raster['x'] = np.arange(m) allocation_agg = allocation(raster) @@ -890,7 +944,7 @@ def direction( x: str = "x", y: str = "y", target_values: list = [], - max_distance: float = None, + max_distance: float = np.inf, distance_metric: str = "EUCLIDEAN", ): """ @@ -916,27 +970,45 @@ def direction( Similar to `proximity`, the implementation for NumPy-backed is ported from GDAL, which uses a dynamic programming approach to identify nearest target of a pixel from its surrounding neighborhood in a 3x3 window - The implementation for Dask-backed uses `sklearn.sklearn.neighbors.KDTree` - internally. + The implementation for Dask-backed uses `dask.map_overlap` to compute + proximity direction chunk by chunk by expanding the chunk's borders + to cover the `max_distance`. Parameters ---------- raster : xr.DataArray 2D array image with `raster.shape` = (height, width). + x : str, default='x' Name of x-coordinates. + y : str, default='y' Name of y-coordinates. + target_values: list Target pixel values to measure the distance from. If this option is not provided, proximity will be computed from non-zero pixel values. - max_distance: float + + max_distance: float, default=np.inf The maximum distance to search. Proximity distances greater than - this value will be set to a NaN value. - Note that this must be given in the same unit as input. For example, - if input raster is in latitude/longitude units, `max_distance` should - also be provided in latitude/longitude units. + this value will be set to NaN. + Should be given in the same distance unit as input. + For example, if input raster is in lat-lon and distances between points + within the raster is calculated using Euclidean distance metric, + `max_distance` should also be provided in lat-lon unit. + If using Great Circle distance metric, and thus all distances is in km, + `max_distance` should also be provided in kilometer unit. + + When scaling with Dask, whether the function scales well depends on + the `max_distance` value. If `max_distance` is infinite by default, + this function only works on a single machine. + It should scale well, however, if `max_distance` is relatively small + compared to the maximum possible distance in two arbitrary points + in the input raster. Note that if `max_distance` is equal or larger + than the max possible distance between 2 arbitrary points in the input + raster, the input data array will be rechunked. + distance_metric: str, default='EUCLIDEAN' The metric for calculating distance between 2 points. Valid distance_metrics are: @@ -968,9 +1040,10 @@ def direction( [0., 0., 0., 0., 0.], [1., 0., 0., 0., 0.] ]) + n, m = data.shape raster = xr.DataArray(data, dims=['y', 'x'], name='raster') raster['y'] = np.arange(n)[::-1] - raster['x'] = np.arange(n) + raster['x'] = np.arange(m) direction_agg = direction(raster) @@ -994,7 +1067,7 @@ def direction( [ 90. , 90. , 0. , 270. , 270. ], [360. , 135. , 180. , 225. , 243.43495 ], [ 0. , 270. , 180. , 206.56505 , 225. ]], - dtype=float64) + dtype=float32) Coordinates: * y (y) int64 4 3 2 1 0 * x (x) int64 0 1 2 3 4 diff --git a/xrspatial/tests/test_proximity.py b/xrspatial/tests/test_proximity.py index c806678c..cd398ea7 100755 --- a/xrspatial/tests/test_proximity.py +++ b/xrspatial/tests/test_proximity.py @@ -61,7 +61,7 @@ def test_proximity(): default_prox = proximity(raster_numpy, x='lon', y='lat') # output must be an xarray DataArray assert isinstance(default_prox, xr.DataArray) - assert type(default_prox.data[0][0]) == np.float64 + assert type(default_prox.data[0][0]) == np.float32 assert default_prox.shape == raster_numpy.shape # in this test case, where no polygon is completely inside another polygon, # number of non-zeros (target pixels) in original image @@ -84,7 +84,7 @@ def test_proximity(): target_values=target_values) # output must be an xarray DataArray assert isinstance(target_prox, xr.DataArray) - assert type(target_prox.data[0][0]) == np.float64 + assert type(target_prox.data[0][0]) == np.float32 assert target_prox.shape == raster_numpy.shape assert (len(np.where(raster_numpy.data == 2)[0]) + len(np.where(raster_numpy.data == 3)[0])) == \ @@ -104,7 +104,7 @@ def test_proximity(): distance_metric='MANHATTAN') # output must be an xarray DataArray assert isinstance(manhattan_prox, xr.DataArray) - assert type(manhattan_prox.data[0][0]) == np.float64 + assert type(manhattan_prox.data[0][0]) == np.float32 assert manhattan_prox.shape == raster_numpy.shape # all output values must be in range [0, max_possible_dist] max_possible_dist = manhattan_distance( @@ -128,7 +128,7 @@ def test_proximity(): distance_metric='GREAT_CIRCLE') # output must be an xarray DataArray assert isinstance(great_circle_prox, xr.DataArray) - assert type(great_circle_prox.data[0][0]) == np.float64 + assert type(great_circle_prox.data[0][0]) == np.float32 assert great_circle_prox.shape == raster_numpy.shape # all output values must be in range [0, max_possible_dist] max_possible_dist = great_circle_distance( @@ -203,7 +203,7 @@ def test_allocation(): # non-zero cells in raster are unique, thus len(px)=len(py)=1 d = euclidean_distance(xcoords[x], xcoords[px[0]], ycoords[y], ycoords[py[0]]) - assert proximity_agg.data[y, x] == d + assert proximity_agg.data[y, x] == np.float32(d) # dask case allocation_agg_dask = allocation(raster_dask, x='lon', y='lat') @@ -253,7 +253,7 @@ def test_direction(): # output must be an xarray DataArray assert isinstance(direction_agg, xr.DataArray) - assert type(direction_agg.data[0][0]) == np.float64 + assert type(direction_agg.data[0][0]) == np.float32 assert direction_agg.shape == raster_numpy.shape assert direction_agg.dims == raster_numpy.dims assert direction_agg.attrs == raster_numpy.attrs