2424import java .io .Reader ;
2525import java .util .ArrayList ;
2626import java .util .List ;
27+ import java .util .Objects ;
2728import java .util .function .BiConsumer ;
2829import java .util .logging .Level ;
2930import java .util .logging .Logger ;
3839public final class UnifiedDiffReader {
3940
4041static final Pattern UNIFIED_DIFF_CHUNK_REGEXP =Pattern .compile ("^@@\\ s+-(?:(\\ d+)(?:,(\\ d+))?)\\ s+\\ +(?:(\\ d+)(?:,(\\ d+))?)\\ s+@@" );
42+ static final Pattern TIMESTAMP_REGEXP =Pattern .compile ("(\\ d{4}-\\ d{2}-\\ d{2}[T ]\\ d{2}:\\ d{2}:\\ d{2}\\ .\\ d{3,})" );
4143
4244private final InternalUnifiedDiffReader READER ;
4345private final UnifiedDiff data =new UnifiedDiff ();
44- private final UnifiedDiffLine [] MAIN_PARSER_RULES = new UnifiedDiffLine []{
45- new UnifiedDiffLine (true ,"^diff\\ s" ,this ::processDiff ),
46- new UnifiedDiffLine (true ,"^index\\ s[\\ da-zA-Z]+\\ .\\ .[\\ da-zA-Z]+(\\ s(\\ d+))?$" ,this ::processIndex ),
47- new UnifiedDiffLine (true ,"^---\\ s" ,this ::processFromFile ),
48- new UnifiedDiffLine (true ,"^\\ +\\ +\\ +\\ s" ,this ::processToFile ),
49- new UnifiedDiffLine ( false , UNIFIED_DIFF_CHUNK_REGEXP , this :: processChunk ),
50- new UnifiedDiffLine ("^ \\ s+" , this ::processNormalLine ),
51- new UnifiedDiffLine ("^- " ,this ::processDelLine ),
52- new UnifiedDiffLine ("^+ " ,this ::processAddLine )
53- } ;
46+
47+ private final UnifiedDiffLine DIFF_COMMAND = new UnifiedDiffLine (true ,"^diff\\ s" ,this ::processDiff );
48+ private final UnifiedDiffLine INDEX = new UnifiedDiffLine (true ,"^index\\ s[\\ da-zA-Z]+\\ .\\ .[\\ da-zA-Z]+(\\ s(\\ d+))?$" ,this ::processIndex );
49+ private final UnifiedDiffLine FROM_FILE = new UnifiedDiffLine (true ,"^---\\ s" ,this ::processFromFile );
50+ private final UnifiedDiffLine TO_FILE = new UnifiedDiffLine (true ,"^\\ +\\ +\\ +\\ s" ,this ::processToFile );
51+
52+ private final UnifiedDiffLine CHUNK = new UnifiedDiffLine (false , UNIFIED_DIFF_CHUNK_REGEXP , this ::processChunk );
53+ private final UnifiedDiffLine LINE_NORMAL = new UnifiedDiffLine ("^\\ s " ,this ::processNormalLine );
54+ private final UnifiedDiffLine LINE_DEL = new UnifiedDiffLine ("^- " ,this ::processDelLine );
55+ private final UnifiedDiffLine LINE_ADD = new UnifiedDiffLine ( "^ \\ +" , this :: processAddLine ) ;
5456
5557private UnifiedDiffFile actualFile ;
5658
@@ -63,36 +65,54 @@ public final class UnifiedDiffReader {
6365// [/^---\s/, from_file], [/^\+\+\+\s/, to_file], [/^@@\s+\-(\d+),?(\d+)?\s+\+(\d+),?(\d+)?\s@@/, chunk],
6466// [/^-/, del], [/^\+/, add], [/^\\ No newline at end of file$/, eof]];
6567private UnifiedDiff parse ()throws IOException ,UnifiedDiffParserException {
66- boolean header =true ;
6768String headerTxt ="" ;
68- String tailTxt ="" ;
69+ LOG .log (Level .INFO ,"header parsing" );
70+ String line =null ;
6971while (READER .ready ()) {
70- String line =READER .readLine ();
71- if (line .matches ("--\\ s*" )) {
72+ line =READER .readLine ();
73+ LOG .log (Level .INFO ,"parsing line {0}" ,line );
74+ if (DIFF_COMMAND .validLine (line ) ||INDEX .validLine (line )
75+ ||FROM_FILE .validLine (line ) ||TO_FILE .validLine (line )) {
7276break ;
7377 }else {
74- LOG . log ( Level . INFO , "parsing line{0}" , line ) ;
75- if ( processLine ( header , line ) == false ) {
76- if ( header ) {
77- headerTxt += line + " \n " ;
78- } else {
79- break ;
80- }
81- } else {
82- if ( header ) {
83- header = false ;
84- data . setHeader ( headerTxt );
78+ headerTxt += line + " \n " ;
79+ }
80+ }
81+ data . setHeader ( headerTxt ) ;
82+
83+ while ( line != null ) {
84+ if (! CHUNK . validLine ( line )) {
85+ initFileIfNecessary ();
86+ while (! CHUNK . validLine ( line ) ) {
87+ if ( processLine ( line , DIFF_COMMAND , INDEX , FROM_FILE , TO_FILE ) == false ) {
88+ throw new UnifiedDiffParserException ( "expected file start line not found" );
8589 }
90+ line =READER .readLine ();
91+ }
92+ }
93+ processLine (line ,CHUNK );
94+ while ((line =READER .readLine ()) !=null ) {
95+ if (processLine (line ,LINE_NORMAL ,LINE_ADD ,LINE_DEL ) ==false ) {
96+ throw new UnifiedDiffParserException ("expected data line not found" );
97+ }
98+ if (originalTxt .size () ==old_size &&revisedTxt .size () ==new_size ) {
99+ finalizeChunk ();
100+ break ;
86101 }
87102 }
103+ line =READER .readLine ();
104+ if (line ==null ||line .startsWith ("--" )) {
105+ break ;
106+ }
88107 }
89108
90- finalizeChunk ();
91-
92- while (READER .ready ()) {
93- tailTxt +=READER .readLine () +"\n " ;
109+ if (READER .ready ()) {
110+ String tailTxt ="" ;
111+ while (READER .ready ()) {
112+ tailTxt +=READER .readLine () +"\n " ;
113+ }
114+ data .setTailTxt (tailTxt );
94115 }
95- data .setTailTxt (tailTxt );
96116
97117return data ;
98118 }
@@ -112,32 +132,31 @@ public static UnifiedDiff parseUnifiedDiff(InputStream stream) throws IOExceptio
112132return parser .parse ();
113133 }
114134
115- private boolean processLine (boolean header ,String line )throws UnifiedDiffParserException {
116- for (UnifiedDiffLine rule :MAIN_PARSER_RULES ) {
117- if (header &&rule .isStopsHeaderParsing () || !header ) {
118- if (rule .processLine (line )) {
119- LOG .info (" >>> processed rule " +rule .toString ());
120- return true ;
121- }
135+ private boolean processLine (String line ,UnifiedDiffLine ...rules )throws UnifiedDiffParserException {
136+ for (UnifiedDiffLine rule :rules ) {
137+ if (rule .processLine (line )) {
138+ LOG .info (" >>> processed rule " +rule .toString ());
139+ return true ;
122140 }
123141 }
124142LOG .info (" >>> no rule matched " +line );
125143return false ;
144+ //throw new UnifiedDiffParserException("parsing error at line " + line);
126145 }
127146
128147private void initFileIfNecessary () {
129148if (!originalTxt .isEmpty () || !revisedTxt .isEmpty ()) {
130- finalizeChunk ();
131- actualFile =null ;
149+ throw new IllegalStateException ();
132150 }
151+ actualFile =null ;
133152if (actualFile ==null ) {
134153actualFile =new UnifiedDiffFile ();
135154data .addFile (actualFile );
136155 }
137156 }
138157
139158private void processDiff (MatchResult match ,String line ) {
140- initFileIfNecessary ();
159+ // initFileIfNecessary();
141160LOG .log (Level .INFO ,"start {0}" ,line );
142161String []fromTo =parseFileNames (READER .lastLine ());
143162actualFile .setFromFile (fromTo [0 ]);
@@ -148,7 +167,9 @@ private void processDiff(MatchResult match, String line) {
148167private List <String >originalTxt =new ArrayList <>();
149168private List <String >revisedTxt =new ArrayList <>();
150169private int old_ln ;
170+ private int old_size ;
151171private int new_ln ;
172+ private int new_size ;
152173
153174private void finalizeChunk () {
154175if (!originalTxt .isEmpty () || !revisedTxt .isEmpty ()) {
@@ -179,9 +200,11 @@ private void processDelLine(MatchResult match, String line) {
179200 }
180201
181202private void processChunk (MatchResult match ,String chunkStart ) {
182- finalizeChunk ();
183- old_ln =match .group (1 ) ==null ?1 :Integer .parseInt (match .group (1 ));
184- new_ln =match .group (3 ) ==null ?1 :Integer .parseInt (match .group (3 ));
203+ // finalizeChunk();
204+ old_ln =toInteger (match ,1 ,1 );
205+ old_size =toInteger (match ,2 ,0 );
206+ new_ln =toInteger (match ,3 ,1 );
207+ new_size =toInteger (match ,4 ,0 );
185208if (old_ln ==0 ) {
186209old_ln =1 ;
187210 }
@@ -190,27 +213,47 @@ private void processChunk(MatchResult match, String chunkStart) {
190213 }
191214 }
192215
216+ private static Integer toInteger (MatchResult match ,int group ,int defValue )throws NumberFormatException {
217+ return Integer .valueOf (Objects .toString (match .group (group ),"" +defValue ));
218+ }
219+
193220private void processIndex (MatchResult match ,String line ) {
194- initFileIfNecessary ();
221+ // initFileIfNecessary();
195222LOG .log (Level .INFO ,"index {0}" ,line );
196223actualFile .setIndex (line .substring (6 ));
197224 }
198225
199226private void processFromFile (MatchResult match ,String line ) {
200- initFileIfNecessary ();
227+ // initFileIfNecessary();
201228actualFile .setFromFile (extractFileName (line ));
229+ actualFile .setFromTimestamp (extractTimestamp (line ));
202230 }
203231
204232private void processToFile (MatchResult match ,String line ) {
205- initFileIfNecessary ();
233+ // initFileIfNecessary();
206234actualFile .setToFile (extractFileName (line ));
235+ actualFile .setToTimestamp (extractTimestamp (line ));
236+ }
237+
238+ private String extractFileName (String _line ) {
239+ Matcher matcher =TIMESTAMP_REGEXP .matcher (_line );
240+ String line =_line ;
241+ if (matcher .find ()) {
242+ line =line .substring (1 ,matcher .start ());
243+ }
244+ return line .substring (4 ).replaceFirst ("^(a|b)\\ /" ,"" )
245+ .replace (TIMESTAMP_REGEXP .toString (),"" ).trim ();
207246 }
208247
209- private String extractFileName (String line ) {
210- return line .substring (4 ).replaceFirst ("^(a|b)\\ /" ,"" );
248+ private String extractTimestamp (String line ) {
249+ Matcher matcher =TIMESTAMP_REGEXP .matcher (line );
250+ if (matcher .find ()) {
251+ return matcher .group ();
252+ }
253+ return null ;
211254 }
212255
213- class UnifiedDiffLine {
256+ final class UnifiedDiffLine {
214257
215258private final Pattern pattern ;
216259private final BiConsumer <MatchResult ,String >command ;
@@ -232,6 +275,11 @@ public UnifiedDiffLine(boolean stopsHeaderParsing, Pattern pattern, BiConsumer<M
232275this .stopsHeaderParsing =stopsHeaderParsing ;
233276 }
234277
278+ public boolean validLine (String line ) {
279+ Matcher m =pattern .matcher (line );
280+ return m .find ();
281+ }
282+
235283public boolean processLine (String line )throws UnifiedDiffParserException {
236284Matcher m =pattern .matcher (line );
237285if (m .find ()) {