Skip to content

Commit 5684705

Browse files
committed
properly handle score_cutoff > 1.0
1 parent faa0687 commit 5684705

File tree

7 files changed

+31
-13
lines changed

7 files changed

+31
-13
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
## Changelog
22

3+
### [1.11.2] - 2023-04-17
4+
#### Fixed
5+
- fix handling of `score_cutoff > 1.0` in `Jaro` and `JaroWinkler`
6+
37
### [1.11.1] - 2023-04-16
48
#### Fixed
59
- fix division by zero in simd implementation of normalized string metrics, when comparing empty strings

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ if (CMAKE_BINARY_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
2525
message(FATAL_ERROR "Building in-source is not supported! Create a build dir and remove ${CMAKE_SOURCE_DIR}/CMakeCache.txt")
2626
endif()
2727

28-
project(rapidfuzz LANGUAGES CXX VERSION 1.11.1)
28+
project(rapidfuzz LANGUAGES CXX VERSION 1.11.2)
2929

3030
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
3131
include(GNUInstallDirs)

extras/rapidfuzz_amalgamated.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
22
// SPDX-License-Identifier: MIT
33
// RapidFuzz v1.0.2
4-
// Generated: 2023-04-17 01:55:45.256062
4+
// Generated: 2023-04-17 13:47:21.759334
55
// ----------------------------------------------------------
66
// This file is an amalgamation of multiple different files.
77
// You probably shouldn't edit it directly.
@@ -5203,6 +5203,8 @@ double jaro_similarity(Range<InputIt1> P, Range<InputIt2> T, double score_cutoff
52035203
int64_t P_len = P.size();
52045204
int64_t T_len = T.size();
52055205

5206+
if (score_cutoff > 1.0) return 0.0;
5207+
52065208
if (!P_len && !T_len) return 1.0;
52075209

52085210
/* filter out based on the length difference between the two strings */
@@ -5250,6 +5252,8 @@ double jaro_similarity(const BlockPatternMatchVector& PM, Range<InputIt1> P, Ran
52505252
int64_t P_len = P.size();
52515253
int64_t T_len = T.size();
52525254

5255+
if (score_cutoff > 1.0) return 0.0;
5256+
52535257
if (!P_len && !T_len) return 1.0;
52545258

52555259
/* filter out based on the length difference between the two strings */

rapidfuzz/distance/Jaro_impl.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,8 @@ double jaro_similarity(Range<InputIt1> P, Range<InputIt2> T, double score_cutoff
343343
int64_t P_len = P.size();
344344
int64_t T_len = T.size();
345345

346+
if (score_cutoff > 1.0) return 0.0;
347+
346348
if (!P_len && !T_len) return 1.0;
347349

348350
/* filter out based on the length difference between the two strings */
@@ -390,6 +392,8 @@ double jaro_similarity(const BlockPatternMatchVector& PM, Range<InputIt1> P, Ran
390392
int64_t P_len = P.size();
391393
int64_t T_len = T.size();
392394

395+
if (score_cutoff > 1.0) return 0.0;
396+
393397
if (!P_len && !T_len) return 1.0;
394398

395399
/* filter out based on the length difference between the two strings */

rapidfuzz_reference/Jaro.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ double jaro_similarity(InputIt1 P_first, InputIt1 P_last, InputIt2 T_first, Inpu
2020
size_t P_len = static_cast<size_t>(std::distance(P_first, P_last));
2121
size_t T_len = static_cast<size_t>(std::distance(T_first, T_last));
2222

23-
if (!P_len || !T_len) return 1.0;
23+
if (score_cutoff > 1.0) return 0.0;
24+
25+
if (!P_len || !T_len) return double(!P_len && !T_len);
2426

2527
std::vector<int> P_flag(P_len + 1);
2628
std::vector<int> T_flag(T_len + 1);

test/distance/tests-Jaro.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ double jaro_similarity(const Sentence1& s1, const Sentence2& s2, double score_cu
1616
rapidfuzz::CachedJaro scorer(s1);
1717
double res5 = scorer.similarity(s2, score_cutoff);
1818
double res6 = scorer.similarity(s2.begin(), s2.end(), score_cutoff);
19-
double res7 = scorer.similarity(s2, score_cutoff);
20-
double res8 = scorer.similarity(s2.begin(), s2.end(), score_cutoff);
19+
double res7 = scorer.normalized_similarity(s2, score_cutoff);
20+
double res8 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff);
2121
REQUIRE(res1 == Approx(res2));
2222
REQUIRE(res1 == Approx(res3));
2323
REQUIRE(res1 == Approx(res4));
@@ -39,8 +39,8 @@ double jaro_distance(const Sentence1& s1, const Sentence2& s2, double score_cuto
3939
rapidfuzz::CachedJaro scorer(s1);
4040
double res5 = scorer.distance(s2, score_cutoff);
4141
double res6 = scorer.distance(s2.begin(), s2.end(), score_cutoff);
42-
double res7 = scorer.distance(s2, score_cutoff);
43-
double res8 = scorer.distance(s2.begin(), s2.end(), score_cutoff);
42+
double res7 = scorer.normalized_distance(s2, score_cutoff);
43+
double res8 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff);
4444
REQUIRE(res1 == Approx(res2));
4545
REQUIRE(res1 == Approx(res3));
4646
REQUIRE(res1 == Approx(res4));
@@ -63,7 +63,9 @@ TEST_CASE("JaroWinklerTest")
6363

6464
SECTION("testFullResultWithScoreCutoff")
6565
{
66-
for (double score_cutoff = 0.0; score_cutoff < 1.1; score_cutoff += 0.1)
66+
auto score_cutoffs = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1};
67+
68+
for (double score_cutoff : score_cutoffs)
6769
for (const auto& name1 : names)
6870
for (const auto& name2 : names) {
6971
INFO("name1: " << name1 << ", name2: " << name2 << ", score_cutoff: " << score_cutoff);

test/distance/tests-JaroWinkler.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double
1818
rapidfuzz::CachedJaroWinkler scorer(s1, prefix_weight);
1919
double res5 = scorer.similarity(s2, score_cutoff);
2020
double res6 = scorer.similarity(s2.begin(), s2.end(), score_cutoff);
21-
double res7 = scorer.similarity(s2, score_cutoff);
22-
double res8 = scorer.similarity(s2.begin(), s2.end(), score_cutoff);
21+
double res7 = scorer.normalized_similarity(s2, score_cutoff);
22+
double res8 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff);
2323
REQUIRE(res1 == Approx(res2));
2424
REQUIRE(res1 == Approx(res3));
2525
REQUIRE(res1 == Approx(res4));
@@ -43,8 +43,8 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr
4343
rapidfuzz::CachedJaroWinkler scorer(s1, prefix_weight);
4444
double res5 = scorer.distance(s2, score_cutoff);
4545
double res6 = scorer.distance(s2.begin(), s2.end(), score_cutoff);
46-
double res7 = scorer.distance(s2, score_cutoff);
47-
double res8 = scorer.distance(s2.begin(), s2.end(), score_cutoff);
46+
double res7 = scorer.normalized_distance(s2, score_cutoff);
47+
double res8 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff);
4848
REQUIRE(res1 == Approx(res2));
4949
REQUIRE(res1 == Approx(res3));
5050
REQUIRE(res1 == Approx(res4));
@@ -67,7 +67,9 @@ TEST_CASE("JaroWinklerTest")
6767

6868
SECTION("testFullResultWithScoreCutoff")
6969
{
70-
for (double score_cutoff = 0.0; score_cutoff < 1.1; score_cutoff += 0.1)
70+
auto score_cutoffs = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1};
71+
72+
for (double score_cutoff : score_cutoffs)
7173
for (const auto& name1 : names)
7274
for (const auto& name2 : names) {
7375
INFO("name1: " << name1 << ", name2: " << name2 << ", score_cutoff: " << score_cutoff);

0 commit comments

Comments
 (0)