Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb139576

Browse files
More experiments with generating input data for syntax tree parsers
1 parent2f0edae commitb139576

File tree

3 files changed

+240
-1
lines changed

3 files changed

+240
-1
lines changed

‎blobstamper/galley.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,19 @@ GalleySetBase::extract_internal(Blob &blob)
347347
return res;
348348
}
349349

350+
void
351+
GalleySetBase::LoadAll(Blob &blob)
352+
{
353+
std::vector<Blob> blobs =extract_internal(blob);
354+
for(int i=0; i<blobs.size(); i++)
355+
{
356+
Blob blob = blobs[i];
357+
StampBase & stamp = stamps[i];
358+
stamp.Load(blob);
359+
}
360+
}
361+
362+
350363
std::vector<std::string>
351364
GalleySetStr::ExtractStrSet(Blob &blob)
352365
{
@@ -356,7 +369,7 @@ GalleySetStr::ExtractStrSet(Blob &blob)
356369
{
357370
Blob blob = blobs[i];
358371
StampBaseStr & stamp = s_stamps[i];
359-
std::string str= stamp.ExtractStr(blob);
372+
std::string str= stamp.ExtractStr(blob);
360373
res.push_back(str);
361374
}
362375
return res;

‎blobstamper/galley.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ class GalleySetBase : public GalleyBase
9999
public:
100100
GalleySetBase(std::vector<std::reference_wrapper<StampBase>> arg) : stamps(arg) {};
101101
std::vector<Blob>extract_internal(Blob &blob);
102+
voidLoadAll(Blob &blob);
102103

103104
intminSize()override;
104105
intmaxSize()override;

‎examples/exampleZZ.cpp

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
#include<stdio.h>
2+
#include<string.h>
3+
4+
#include<string>
5+
#include<iostream>
6+
#include<cstdlib>
7+
8+
#include<blobstamper/blobstamper.h>
9+
10+
namespacestd
11+
{
12+
template<classT>using ref_vector = vector<reference_wrapper<T>>;
13+
}
14+
15+
template<classStampT>classStampLottery:publicStampT
16+
{
17+
protected:
18+
std::ref_vector<StampT> stamps;
19+
int oracle_size;
20+
intinit_oracle_size(std::ref_vector<StampT> stamps_arg);
21+
22+
int stored_min;
23+
intinit_stored_min(std::ref_vector<StampT> stamps_arg);
24+
25+
public:
26+
StampLottery(std::ref_vector<StampT> stamps_arg): stamps(stamps_arg), oracle_size(init_oracle_size(stamps_arg)), stored_min(init_stored_min(stamps_arg)) {};
27+
StampLottery(): stored_min(-1) {};
28+
29+
virtualintminSize()override;
30+
virtualintmaxSize()override;
31+
virtual std::stringExtractStr(Blob &blob)override;
32+
voidAppend(StampT & stamp);
33+
};
34+
35+
36+
template<classStampT>int
37+
StampLottery<StampT>::
38+
init_stored_min(std::ref_vector<StampT> stamps_arg)
39+
{
40+
int min = std::numeric_limits<int>::max();
41+
42+
for(StampT & stamp : stamps)
43+
{
44+
45+
if (min > stamp.minSize())
46+
min = stamp.minSize();
47+
}
48+
return min;
49+
}
50+
51+
template<classStampT>int
52+
StampLottery<StampT>::init_oracle_size(std::ref_vector<StampT> stamps_arg)
53+
{
54+
unsignedlong size = stamps_arg.size();
55+
if (size < std::numeric_limits<unsignedchar>::max())
56+
return1;
57+
if (size < std::numeric_limits<unsignedshortint>::max())
58+
return2;
59+
if (size < std::numeric_limits<unsignedint>::max())
60+
return4;
61+
return8;
62+
}
63+
64+
65+
template<classStampT>int
66+
StampLottery<StampT>::minSize()
67+
{
68+
return stored_min + oracle_size;
69+
}
70+
71+
template<classStampT>int
72+
StampLottery<StampT>::maxSize()
73+
{
74+
return -1;// FIXME this is true only for recurion case. Should fix it somehow if Lottery is used in other cases
75+
}
76+
77+
78+
template<classStampT> std::string
79+
StampLottery<StampT>::ExtractStr(Blob &blob)
80+
{
81+
unsignedlong oracle;
82+
unsignedlong oracle_max;
83+
84+
switch (oracle_size)
85+
{
86+
case1:
87+
{
88+
StampArithm<unsignedchar> stamp;
89+
oracle = stamp.ExtractValue(blob);
90+
oracle_max = std::numeric_limits<unsignedchar>::max();
91+
break;
92+
}
93+
case2:
94+
{
95+
StampArithm<unsignedshort> stamp;
96+
oracle = stamp.ExtractValue(blob);
97+
oracle_max = std::numeric_limits<unsignedshort>::max();
98+
break;
99+
}
100+
case4:
101+
{
102+
StampArithm<unsignedint> stamp;
103+
oracle = stamp.ExtractValue(blob);
104+
oracle_max = std::numeric_limits<unsignedint>::max();
105+
break;
106+
}
107+
case8:
108+
{
109+
StampArithm<unsignedlong> stamp;
110+
oracle = stamp.ExtractValue(blob);
111+
oracle_max = std::numeric_limits<unsignedlong>::max();
112+
break;
113+
}
114+
default:
115+
abort();// Should never get here
116+
}
117+
118+
/* Actually we use only stamps that short enogh to consume blob's available data*/
119+
std::ref_vector<StampT> actual_stamps;
120+
for(StampT & stamp : stamps)
121+
{
122+
if(blob.Size() < stamp.minSize())// Skip all stamps that dose not fit
123+
continue;
124+
if ( stamp.isUnbounded() ||// Unbounded is always ok
125+
stamp.maxSize() > blob.Size() ||// Variated that can consume all data is ok
126+
stamp.minSize() *2 > blob.Size()// Fixed or variated stamp that lefts less data then it's min size will also do
127+
)
128+
{
129+
actual_stamps.push_back(stamp);
130+
}
131+
}
132+
if (actual_stamps.empty())
133+
{
134+
// Add just everything that fits
135+
for(StampT & stamp : stamps)
136+
{
137+
if(blob.Size() < stamp.minSize())// Skip all stamps that dose not fit
138+
continue;
139+
actual_stamps.push_back(stamp);
140+
}
141+
}
142+
143+
if (actual_stamps.empty())
144+
throwOutOfData();// This should not happen
145+
146+
longlongindex = ((double) oracle) / oracle_max * actual_stamps.size();
147+
if (index == actual_stamps.size())index--;/* If we hit the boundary step inside a bit*/
148+
149+
StampT& stamp = actual_stamps[index];
150+
return stamp.ExtractStr(blob);
151+
}
152+
153+
154+
template<classStampT>void
155+
StampLottery<StampT>::Append(StampT & stamp)
156+
{
157+
if (stamp.minSize()<stored_min)
158+
{
159+
stored_min = stamp.minSize();
160+
}
161+
stamps.push_back(stamp);
162+
oracle_size =init_oracle_size(stamps);
163+
}
164+
165+
166+
classBinaryOp:publicStampBaseStr,publicGalleySetBase
167+
{
168+
protected:
169+
std::string op_name;
170+
StampBaseStr &stamp1;
171+
StampBaseStr &stamp2;
172+
public:
173+
virtual std::stringExtractStr(Blob &blob)override;
174+
BinaryOp(std::string arg_op_name, StampBaseStr& arg_stamp1, StampBaseStr& arg_stamp2) :
175+
GalleySetBase({arg_stamp1, arg_stamp2}),
176+
op_name(arg_op_name),
177+
stamp1(arg_stamp1),
178+
stamp2(arg_stamp2) {};
179+
};
180+
181+
std::string
182+
BinaryOp::ExtractStr(Blob &blob)
183+
{
184+
std::vector<Blob> blobs =extract_internal(blob);
185+
return (std::string)"(" + stamp1.ExtractStr(blobs[0]) +""+ op_name +"" + stamp2.ExtractStr(blobs[1]) +")";
186+
}
187+
188+
std::vector<std::string> ops = {"+","-","*","/","^"};
189+
190+
intmain()
191+
{
192+
// char data[] = "abcdef" "abcdef" "ABCDEF" "012345" "sdfaskdlfjalsfjdlasjfaksdjfgkwuergkwhfdaksjdfgaskuyrgfaelkrgfsaldjfgakyefgrkweugyfaksjskdfsd";
193+
194+
char data[] =
195+
"\x051\x04E\x05A\x018\x043\x00C\x039\x0DC\x069\x0AC\x009\x014\x05A\x0B2\x07F\x078\x021\x09F\x08B\x0B1\x07E\x060\x01F\x04A\x0D1\x071\x05C\x04F\x011\x0D0\x061\x0FB\x037\x077\x081\x00C\x059\x00A\x037\x02F\x061\x04A\x065\x06D"
196+
"\x003\x04A\x0BC\x099\x0F8\x00B\x0F7\x020\x0C9\x074\x065\x008\x0B4\x010\x008\x0B4\x08B\x070\x0E1\x0EF\x026\x04F\x0F9\x0AB\x01C\x06C\x035\x018\x086\x037\x0E7\x02F\x044\x057\x001\x020\x006\x0DD\x0C4\x059\x0D1\x0C5\x0A9\x005"
197+
"\x038\x078\x0E2\x053\x01D\x0F0\x06E\x0E6\x018\x0B6\x048\x0F1\x0DC\x061\x092\x0FB\x0D3\x010\x0B8\x042\x0CA\x0C1\x0E3\x075\x077\x099\x093\x0CC\x063\x0F0\x09E\x044\x03D\x070\x01A\x089\x035\x032\x04A\x0BD\x082\x0BF\x0EA\x002"
198+
"\x043\x071\x079\x0A0\x068\x0B3\x0D9\x029\x0E9\x045\x0A2\x027\x003\x02E\x0E2\x01F\x007\x0BD\x0CF\x00A\x03E\x00D\x044\x024\x0FA\x0DB\x03D\x033\x036\x011\x081\x070\x0B6\x04A\x083\x061\x05F\x0AE\x0F0\x0C5\x0A1\x010\x05B\x003"
199+
"\x061\x0C3\x0D2\x078\x0BD\x0F8\x0E1\x04B\x02F\x0D9\x093\x09F\x00E\x0D6\x03A\x070\x0F8\x052\x013\x0EE\x062\x0C0\x027\x0E5\x07B\x07B\x09E\x05D\x074\x068\x0C6\x0CD\x04E\x022\x03B\x04E\x0E7\x0E7\x0EE\x0EC\x015\x02C\x0FA\x050"
200+
"\x033\x042\x0E6\x0BF\x028\x002\x052\x096\x033\x057\x0D8\x082\x053\x06E\x0BD\x0C6\x0ED\x015\x036\x09E\x03B\x0BE\x0F3\x068\x0BD\x0EC\x0D3\x0E9\x023\x029\x081\x0CF\x0F8\x02D\x081\x049\x007\x0CC\x005\x004\x062\x040\x0E0\x0D0"
201+
"\x0CD\x062\x0D4\x09B\x007\x001\x037\x020\x059\x0AC\x0FC\x0A4\x095\x049\x05F\x04C\x0DA\x02B\x0E8\x0E9\x0BF\x029\x01F\x0D0\x06B\x06E\x0F5\x005\x075\x07B\x036\x0D2\x054\x078\x0D3\x059\x077\x09A\x0D5\x079\x0AC\x034\x030\x0FD"
202+
"\x006\x079\x022\x0F4\x0ED\x059\x080\x081\x08F\x0A6\x08F\x042\x08A\x0CC\x030\x019\x094\x0F3\x062\x00B\x08A\x0D4\x0F8\x0F3\x03B\x049\x0D1\x06D\x0C6\x067\x006\x0D3\x023\x035\x053\x0C1\x0F8\x068\x0EF\x0AD\x0C7\x053\x004\x02C"
203+
"\x092\x087\x075\x0B0\x0F0\x0F7\x0D9\x04C\x0C7\x0A2\x095\x02B\x038\x02E\x0F2\x005\x0BE\x0CD\x02E\x093\x08A\x088\x063\x07D\x0F1\x08A\x002\x0D0\x0B9\x05C\x008\x066\x002\x044\x0B0\x08F\x041\x009\x06F\x0E5\x08B\x068\x0EB\x05A";
204+
205+
Blobblob(data,strlen(data));
206+
207+
StampArithm<unsignedchar> stampс;
208+
209+
DictLCAlphaSmall dict;
210+
StampDictstamp_dict(dict);
211+
212+
StampLottery<StampBaseStr>stamp_lot({stampс, stamp_dict});
213+
214+
for(std::string op_name : ops)
215+
{
216+
BinaryOp *stamp_bi =newBinaryOp(op_name, stamp_lot, stamp_lot);
217+
stamp_lot.Append(*stamp_bi);
218+
}
219+
220+
for(int i=stamp_lot.minSize(); i<=strlen(data);i++)
221+
{
222+
Blobblob2(data, i);
223+
std::cout << i <<"" << stamp_lot.ExtractStr(blob2) <<"\n";
224+
}
225+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp