Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1575a31

Browse files
committed
updated javadoc, changelog and readme
1 parent19b7b73 commit1575a31

File tree

4 files changed

+114
-60
lines changed

4 files changed

+114
-60
lines changed

‎CHANGELOG.md‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
#v0.2.2 | 2019-03-11
22
* introduced version number for file format
33
* changed replacement detection to use sorting instead of a map
4-
*mproved minifying of acceptors
4+
*improved minifying of acceptors
55
* bug fixes
66

77
#v0.2.1 | 2018-11-21
88
* optimized minifying and remapping
99
* added transducer methods
1010

1111
#v0.2.0 | 2018-06-15
12-
* moved char reading/writing method intoseparator class
12+
* moved char reading/writing method intoseparate class
1313
* improved memory footprint and execution time for creating CharAcceptors
1414
* fixed issue with serializing CharAcceptor
1515
* performance optimization of case-insensitive char comparison

‎README.md‎

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,7 @@
1-
#indoqa-fsa
1+
#indoqa-fsa
2+
3+
Provides an abstraction layer for acceptors and transducers from[Morfologik](https://github.com/morfologik/) as well as alternative implementations.
4+
5+
The abstraction layer handles the conversion between Strings and bytes, offers support for case-insensitive operations and easier construction of acceptors and transducers.
6+
7+
The alternative implementations work directly on characters, which results in better runtime behaviour and greatly reduced need for garbage collection.

‎src/main/java/com/indoqa/fsa/Acceptor.java‎

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,137 @@
2020

2121
publicinterfaceAcceptor {
2222

23+
/**
24+
* Checks whether or not this {@link Acceptor} accepts the given <code>sequence</code>.
25+
*
26+
* @param sequence The {@link CharSequence} to check.
27+
* @return <code>true</code> if and only if this {@link Acceptor} accepts the given <code>sequence</code>
28+
*/
2329
booleanaccepts(CharSequencesequence);
2430

31+
/**
32+
* Performs the same opeation as {@link #accepts(CharSequence)} but on the part of <code>sequence</code> denoted by
33+
* <code>start</code> and <code>length</code>.
34+
*
35+
* @see #accepts(CharSequence)
36+
*/
2537
booleanaccepts(CharSequencesequence,intstart,intlength);
2638

39+
/**
40+
* Find all accepted inputs at the beginning of given <code>charSequence</code>.<br/>
41+
* <br/>
42+
* Given the sequence <code>aa bbb cccc ddddd</code><br/>
43+
* and the accepted inputs <code>a</code>, <code>aa</code>, <code>aaa</code>, <code>b</code>, <code>bb</code>,
44+
* <code>bbb</code><br/>
45+
* the matches will be <code>a</code> and <code>aa</code>
46+
*
47+
* @param charSequence The {@link CharSequence} to examine.
48+
* @return All accepted inputs at the beginning of the charSequence.
49+
*/
2750
String[]getAllMatches(CharSequencesequence);
2851

52+
/**
53+
* Performs the same opeation as {@link #getAllMatches(CharSequence, int, int)} but on the part of <code>sequence</code> denoted by
54+
* <code>start</code> and <code>length</code>.
55+
*
56+
* @see #getAllMatches(CharSequence)
57+
*/
2958
String[]getAllMatches(CharSequencesequence,intstart,intlength);
3059

60+
/**
61+
* Find all accepted inputs in the given <code>charSequence</code>.<br/>
62+
* <p>
63+
* The only difference to {@link #getAllTokens(CharSequence)} is that the accepted input may occur at any position within the
64+
* <code>charSequence</code> (specifically start and end inside a token).
65+
* </p>
66+
*
67+
* @param charSequence The {@link CharSequence} to examine.
68+
* @return all occurrences of accepted input
69+
*/
3170
List<Token>getAllOccurrences(CharSequencesequence);
3271

72+
/**
73+
* Performs the same operation as {@link #getAllOccurrences(CharSequence)} but on the part of <code>sequence</code> denoted by
74+
* <code>start</code> and <code>length</code>.
75+
*
76+
* @see #getAllOccurrences(CharSequence)
77+
*/
3378
List<Token>getAllOccurrences(CharSequencesequence,intstart,intlength);
3479

80+
/**
81+
* Find all accepted inputs that are tokens in the given <code>charSequence</code>.<br/>
82+
* <p>
83+
* A part of the given sequence is considered to be a <code>token</code>, when it starts and ends at a token boundary.<br/>
84+
* A token boundary is the change from a non-word character to a word character (or vice-versa), as well as the beginning and end
85+
* of the whole sequence.<br/>
86+
* Please note that a token may contain token boundaries.
87+
* </p>
88+
*
89+
* @param charSequence The {@link CharSequence} to examine.
90+
* @return All tokens of accepted inputs.
91+
*/
3592
List<Token>getAllTokens(CharSequencesequence);
3693

94+
/**
95+
* Performs the same operation as {@link #getAllTokens(CharSequence)} but on the part of <code>sequence</code> denoted by
96+
* <code>start</code> and <code>length</code>.
97+
*
98+
* @see #getAllTokens(CharSequence)
99+
*/
37100
List<Token>getAllTokens(CharSequencesequence,intstart,intlength);
38101

102+
/**
103+
* Find the longest accepted input at the beginning of given <code>charSequence</code>.<br/>
104+
* <br/>
105+
* Given the sequence <code>aa bbb cccc ddddd</code><br/>
106+
* and the accepted inputs <code>a</code>, <code>aa</code>, <code>aaa</code>, <code>b</code>, <code>bb</code>,
107+
* <code>bbb</code><br/>
108+
* the longest match will be <code>aa</code>
109+
*
110+
* @param charSequence The charSequence to examine.
111+
* @return The longest accepted input at the beginning of the charSequence.
112+
*/
39113
StringgetLongestMatch(CharSequencesequence);
40114

115+
/**
116+
* Performs the same operation as {@link #getLongestMatch(CharSequence)} but on the part of <code>sequence</code> denoted by
117+
* <code>start</code> and <code>length</code>.
118+
*
119+
* @see #getLongestMatch(CharSequence)
120+
*/
41121
StringgetLongestMatch(CharSequencesequence,intstart,intlength);
42122

123+
/**
124+
* Performs {@link #getAllOccurrences(CharSequence)} and then eliminates overlapping {@link Token Tokens} by only keeping the
125+
* longest.
126+
*
127+
* @param charSequence The {@link CharSequence} to examine.
128+
* @return The longest occurrences of accepted input.
129+
*/
43130
List<Token>getLongestOccurrences(CharSequencesequence);
44131

132+
/**
133+
* Performs the same operation as {@link #getLongestOccurrences(CharSequence)} but on the part of <code>sequence</code> denoted by
134+
* <code>start</code> and <code>length</code>.
135+
*
136+
* @see #getLongestOccurrences(CharSequence)
137+
*/
45138
List<Token>getLongestOccurrences(CharSequencesequence,intstart,intlength);
46139

140+
/**
141+
* Performs {@link #getAllTokens(CharSequence)} and then eliminates overlapping {@link Token Tokens} by only keeping the longest.
142+
*
143+
* @param charSequence The {@link CharSequence} to examine.
144+
* @return The longest tokens of accepted input.
145+
*/
47146
List<Token>getLongestTokens(CharSequencesequence);
48147

148+
/**
149+
* Performs the same operation as {@link #getLongestTokens(CharSequence)} but on the part of <code>sequence</code> denoted by
150+
* <code>start</code> and <code>length</code>.
151+
*
152+
* @see #getLongestTokens(CharSequence)
153+
*/
49154
List<Token>getLongestTokens(CharSequencesequence,intstart,intlength);
50155

51156
}

‎src/main/java/com/indoqa/fsa/morfologik/MorfologikAcceptor.java‎

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,6 @@ public boolean accepts(CharSequence sequence, int start, int length) {
6060
returnthis.accepts(sequence.subSequence(start,start +length));
6161
}
6262

63-
/**
64-
* Find the all accepted inputs at the beginning of given <code>charSequence</code>.<br/>
65-
* <br/>
66-
* Given the sequence <code>aa bbb cccc ddddd</code><br/>
67-
* and the accepted inputs <code>a</code>, <code>aa</code>, <code>aaa</code>, <code>b</code>, <code>bb</code>,
68-
* <code>bbb</code><br/>
69-
* the matches will be <code>a</code> and <code>aa</code>
70-
*
71-
* @param charSequence The {@link CharSequence} to examine.
72-
* @return All accepted inputs at the beginning of the charSequence.
73-
*/
7463
@Override
7564
publicString[]getAllMatches(CharSequencesequence) {
7665
byte[]bytes =this.getBytes(sequence);
@@ -89,16 +78,6 @@ public String[] getAllMatches(CharSequence sequence, int start, int length) {
8978
returnthis.getAllMatches(sequence.subSequence(start,start +length));
9079
}
9180

92-
/**
93-
* Find all accepted inputs in the given <code>charSequence</code>.<br/>
94-
* <p>
95-
* The only difference to {@link #getAllTokens(CharSequence)} is that the accepted input may occur at any position within the
96-
* <code>charSequence</code> (specifically start and end inside a token).
97-
* </p>
98-
*
99-
* @param charSequence The {@link CharSequence} to examine.
100-
* @return all occurrences of accepted input
101-
*/
10281
@Override
10382
publicList<Token>getAllOccurrences(CharSequencecharSequence) {
10483
List<Token>result =newArrayList<>();
@@ -124,18 +103,6 @@ public List<Token> getAllOccurrences(CharSequence sequence, int start, int lengt
124103
returnthis.getAllOccurrences(sequence.subSequence(start,start +length));
125104
}
126105

127-
/**
128-
* Find all accepted inputs that are tokens in the given <code>charSequence</code>.<br/>
129-
* <p>
130-
* A part of the given sequence is considered to be a <code>token</code>, when it starts and ends at a token boundary.<br/>
131-
* A token boundary is the change from a non-word character to a word character (or vice-versa), as well as the beginning and end
132-
* of the whole sequence.<br/>
133-
* Please note that a token may contain token boundaries.
134-
* </p>
135-
*
136-
* @param charSequence The {@link CharSequence} to examine.
137-
* @return All tokens of accepted inputs.
138-
*/
139106
@Override
140107
publicList<Token>getAllTokens(CharSequencecharSequence) {
141108
List<Token>result =newArrayList<>();
@@ -168,17 +135,6 @@ public List<Token> getAllTokens(CharSequence sequence, int start, int length) {
168135
returnthis.getAllTokens(sequence.subSequence(start,start +length));
169136
}
170137

171-
/**
172-
* Find the longest accepted input at the beginning of given <code>charSequence</code>.<br/>
173-
* <br/>
174-
* Given the sequence <code>aa bbb cccc ddddd</code><br/>
175-
* and the accepted inputs <code>a</code>, <code>aa</code>, <code>aaa</code>, <code>b</code>, <code>bb</code>,
176-
* <code>bbb</code><br/>
177-
* the longest match will be <code>aa</code>
178-
*
179-
* @param charSequence The charSequence to examine.
180-
* @return The longest accepted input at the beginning of the charSequence.
181-
*/
182138
@Override
183139
publicStringgetLongestMatch(CharSequencecharSequence) {
184140
byte[]bytes =this.getBytes(charSequence);
@@ -196,13 +152,6 @@ public String getLongestMatch(CharSequence sequence, int start, int length) {
196152
returnthis.getLongestMatch(sequence.subSequence(start,start +length));
197153
}
198154

199-
/**
200-
* Performs {@link #getAllOccurrences(CharSequence)} and then eliminates overlapping {@link Token Tokens} by only keeping the
201-
* longest.
202-
*
203-
* @param charSequence The {@link CharSequence} to examine.
204-
* @return The longest occurrences of accepted input.
205-
*/
206155
@Override
207156
publicList<Token>getLongestOccurrences(CharSequencecharSequence) {
208157
returneliminateOverlapping(this.getAllOccurrences(charSequence));
@@ -213,12 +162,6 @@ public List<Token> getLongestOccurrences(CharSequence sequence, int start, int l
213162
returnthis.getLongestOccurrences(sequence.subSequence(start,start +length));
214163
}
215164

216-
/**
217-
* Performs {@link #getAllTokens(CharSequence)} and then eliminates overlapping {@link Token Tokens} by only keeping the longest.
218-
*
219-
* @param charSequence The {@link CharSequence} to examine.
220-
* @return The longest tokens of accepted input.
221-
*/
222165
@Override
223166
publicList<Token>getLongestTokens(CharSequencecharSequence) {
224167
returneliminateOverlapping(this.getAllTokens(charSequence));

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp