Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit03d037a

Browse files
committed
add initial fuzz test for partial_ratio
1 parentc6a3ac8 commit03d037a

File tree

5 files changed

+163
-0
lines changed

5 files changed

+163
-0
lines changed

‎fuzzing/CMakeLists.txt‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,5 @@ create_fuzzer(osa_distance)
2020
create_fuzzer(damerau_levenshtein_distance)
2121

2222
create_fuzzer(jaro_similarity)
23+
24+
create_fuzzer(partial_ratio)

‎fuzzing/fuzz_partial_ratio.cpp‎

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/* SPDX-License-Identifier: MIT*/
2+
/* Copyright © 2021 Max Bachmann*/
3+
4+
#include"../rapidfuzz_reference/fuzz.hpp"
5+
#include"fuzzing.hpp"
6+
#include<cstdint>
7+
#include<rapidfuzz/fuzz.hpp>
8+
#include<stdexcept>
9+
#include<string>
10+
11+
boolis_close(double a,double b,double epsilon)
12+
{
13+
returnfabs(a - b) <= ((fabs(a) <fabs(b) ?fabs(b) :fabs(a)) * epsilon);
14+
}
15+
16+
voidvalidate_distance(const std::vector<uint8_t>& s1,const std::vector<uint8_t>& s2)
17+
{
18+
auto sim =rapidfuzz::fuzz::partial_ratio(s1, s2);
19+
auto reference_sim =rapidfuzz_reference::partial_ratio(s1, s2);
20+
if (!is_close(sim, reference_sim,0.0001)) {
21+
print_seq("s1:", s1);
22+
print_seq("s2:", s2);
23+
throwstd::logic_error(std::string("partial_ratio failed (reference_score =") +
24+
std::to_string(reference_sim) +std::string(", score =") +
25+
std::to_string(sim) +")");
26+
}
27+
}
28+
29+
extern"C"intLLVMFuzzerTestOneInput(constuint8_t* data,size_t size)
30+
{
31+
std::vector<uint8_t> s1, s2;
32+
if (!extract_strings(data, size, s1, s2))return0;
33+
34+
validate_distance(s1, s2);
35+
validate_distance(s2, s1);
36+
37+
/* test long sequences*/
38+
for (unsignedint i =2; i <9; ++i) {
39+
std::vector<uint8_t> s1_ =vec_multiply(s1, pow<size_t>(2, i));
40+
std::vector<uint8_t> s2_ =vec_multiply(s2, pow<size_t>(2, i));
41+
42+
if (s1_.size() >10000 || s2_.size() >10000)break;
43+
44+
validate_distance(s1_, s2_);
45+
validate_distance(s2_, s1_);
46+
validate_distance(s1, s2_);
47+
validate_distance(s2_, s1);
48+
validate_distance(s1_, s2);
49+
validate_distance(s2, s1_);
50+
}
51+
52+
return0;
53+
}

‎rapidfuzz_reference/Indel.hpp‎

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,17 @@ size_t indel_distance(const Sentence1& s1, const Sentence2& s2,
2222
returnlevenshtein_distance(s1, s2, {1,1,2}, score_cutoff);
2323
}
2424

25+
template<typename InputIt1,typename InputIt2>
26+
doubleindel_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
27+
double score_cutoff =0.0)
28+
{
29+
returnlevenshtein_similarity(first1, last1, first2, last2, {1,1,2}, score_cutoff);
30+
}
31+
32+
template<typename Sentence1,typename Sentence2>
33+
doubleindel_similarity(const Sentence1& s1,const Sentence2& s2,double score_cutoff =0.0)
34+
{
35+
returnlevenshtein_similarity(s1, s2, {1,1,2}, score_cutoff);
36+
}
37+
2538
}// namespace rapidfuzz_reference

‎rapidfuzz_reference/Levenshtein.hpp‎

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,18 @@ struct LevenshteinWeightTable {
1616
size_t replace_cost;
1717
};
1818

19+
staticinlinesize_tlevenshtein_maximum(size_t len1,size_t len2, LevenshteinWeightTable weights)
20+
{
21+
size_t max_dist = len1 * weights.delete_cost + len2 * weights.insert_cost;
22+
23+
if (len1 >= len2)
24+
max_dist =std::min(max_dist, len2 * weights.replace_cost + (len1 - len2) * weights.delete_cost);
25+
else
26+
max_dist =std::min(max_dist, len1 * weights.replace_cost + (len2 - len1) * weights.insert_cost);
27+
28+
return max_dist;
29+
}
30+
1931
template<typename InputIt1,typename InputIt2>
2032
Matrix<size_t>levenshtein_matrix(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
2133
LevenshteinWeightTable weights = {1,1,1})
@@ -69,4 +81,24 @@ size_t levenshtein_distance(const Sentence1& s1, const Sentence2& s2,
6981
score_cutoff);
7082
}
7183

84+
template<typename InputIt1,typename InputIt2>
85+
doublelevenshtein_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
86+
LevenshteinWeightTable weights = {1,1,1},double score_cutoff =0.0)
87+
{
88+
size_t len1 =static_cast<size_t>(std::distance(first1, last1));
89+
size_t len2 =static_cast<size_t>(std::distance(first2, last2));
90+
size_t dist =levenshtein_distance(first1, last1, first2, last2, weights);
91+
size_t max =levenshtein_maximum(len1, len2, weights);
92+
double sim =1.0 - (double)dist / max;
93+
return (sim >= score_cutoff) ? sim :0.0;
94+
}
95+
96+
template<typename Sentence1,typename Sentence2>
97+
doublelevenshtein_similarity(const Sentence1& s1,const Sentence2& s2,
98+
LevenshteinWeightTable weights = {1,1,1},double score_cutoff =0.0)
99+
{
100+
returnlevenshtein_similarity(std::begin(s1),std::end(s1),std::begin(s2),std::end(s2), weights,
101+
score_cutoff);
102+
}
103+
72104
}// namespace rapidfuzz_reference

‎rapidfuzz_reference/fuzz.hpp‎

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/* SPDX-License-Identifier: MIT*/
2+
/* Copyright © 2022-present Max Bachmann*/
3+
4+
#pragma once
5+
6+
#include"Indel.hpp"
7+
8+
namespacerapidfuzz_reference {
9+
10+
template<typename InputIt1,typename InputIt2>
11+
doubleratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,double score_cutoff =0.0)
12+
{
13+
returnindel_similarity(first1, last1, first2, last2, score_cutoff /100.0) *100;
14+
}
15+
16+
template<typename Sentence1,typename Sentence2>
17+
doubleratio(const Sentence1& s1,const Sentence2& s2,double score_cutoff =0.0)
18+
{
19+
returnindel_similarity(s1, s2, score_cutoff /100.0) *100;
20+
}
21+
22+
template<typename InputIt1,typename InputIt2>
23+
doublepartial_ratio_impl(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
24+
double score_cutoff =0.0)
25+
{
26+
size_t len1 =static_cast<size_t>(std::distance(first1, last1));
27+
size_t len2 =static_cast<size_t>(std::distance(first2, last2));
28+
if (len1 ==0 && len2 ==0)return100.0;
29+
30+
if (len1 ==0 || len2 ==0)return0.0;
31+
32+
if (len1 > len2)returnpartial_ratio_impl(first2, last2, first1, last1, score_cutoff);
33+
34+
double res =0.0;
35+
for (ptrdiff_t i = -1 * (ptrdiff_t)len1; i < (ptrdiff_t)len2; i++) {
36+
ptrdiff_t start =std::max(ptrdiff_t(0), i);
37+
ptrdiff_t end =std::min(ptrdiff_t(len2), i +ptrdiff_t(len1));
38+
InputIt2 first2_ = first2 + start;
39+
InputIt2 last2_ = first2 + end;
40+
res =std::max(res,ratio(first1, last1, first2_, last2_, score_cutoff));
41+
}
42+
return res;
43+
}
44+
45+
template<typename InputIt1,typename InputIt2>
46+
doublepartial_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
47+
double score_cutoff =0.0)
48+
{
49+
size_t len1 =static_cast<size_t>(std::distance(first1, last1));
50+
size_t len2 =static_cast<size_t>(std::distance(first2, last2));
51+
if (len1 != len2)returnpartial_ratio_impl(first1, last1, first2, last2, score_cutoff);
52+
53+
returnstd::max(partial_ratio_impl(first1, last1, first2, last2, score_cutoff),
54+
partial_ratio_impl(first2, last2, first1, last1, score_cutoff));
55+
}
56+
57+
template<typename Sentence1,typename Sentence2>
58+
doublepartial_ratio(const Sentence1& s1,const Sentence2& s2,double score_cutoff =0.0)
59+
{
60+
returnpartial_ratio(std::begin(s1),std::end(s1),std::begin(s2),std::end(s2), score_cutoff);
61+
}
62+
63+
}// namespace rapidfuzz_reference

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp