Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit922a852

Browse files
committed
incomplete
1 parent80600e0 commit922a852

File tree

1 file changed

+286
-0
lines changed

1 file changed

+286
-0
lines changed

‎extract-content.js

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
extractContent=function(d){
2+
if(!d.body)returnnull;
3+
4+
varUtil={
5+
inherit:function(child,parent){
6+
varobj=child||{};
7+
for(varpropinparent){
8+
if(typeof(obj[prop]=='undefined')){
9+
obj[prop]=parent[prop];
10+
}
11+
}
12+
returnobj;
13+
}
14+
};
15+
varA={};
16+
A.indexOf=Array.indexOf||function(self,elt/*, from*/){
17+
varargi=1;
18+
varlen=self.length;
19+
varfrom=Number(arguments[argi++])||0;
20+
from=(from<0) ?Math.ceil(from) :Math.floor(from);
21+
if(from<0)from+=len;
22+
for(;from<len;from++){
23+
if(frominself&&self[from]===elt)returnfrom;
24+
}
25+
return-1;
26+
};
27+
A.filter=Array.filter||function(self,fun/*, thisp*/){
28+
varargi=1;
29+
varlen=self.length;
30+
if(typeoffun!="function"){
31+
thrownewTypeError('Array.prototype.filter: not a function');
32+
}
33+
varrv=newArray();
34+
varthisp=arguments[argi++];
35+
for(vari=0;i<len;i++){
36+
if(iinself){
37+
varval=self[i];// in case fun mutates this
38+
if(fun.call(thisp,val,i,self))rv.push(val);
39+
}
40+
}
41+
returnrv;
42+
};
43+
A.forEach=Array.forEach||function(self,fun/*, thisp*/){
44+
varargi=1;
45+
varlen=self.length;
46+
if(typeoffun!='function'){
47+
thrownewTypeError('Array.prototype.forEach: not a function');
48+
}
49+
varthisp=arguments[argi++];
50+
for(vari=0;i<len;i++){
51+
if(iinself)fun.call(thisp,self[i],i,self);
52+
}
53+
};
54+
A.every=Array.every||function(self,fun/*, thisp*/){
55+
varargi=1;
56+
varlen=self.length;
57+
if(typeoffun!='function'){
58+
thrownewTypeError('Array.prototype.every: not a function');
59+
}
60+
varthisp=arguments[argi++];
61+
for(vari=0;i<len;i++){
62+
if(iinself&&
63+
!fun.call(thisp,self[i],i,self)){
64+
returnfalse;
65+
}
66+
}
67+
returntrue;
68+
};
69+
A.map=Array.map||function(self,fun/*, thisp*/){
70+
varargi=1;
71+
varlen=self.length;
72+
if(typeoffun!='function'){
73+
thrownewTypeError('Array.prototype.map: not a function');
74+
}
75+
varrv=newArray(len);
76+
varthisp=arguments[argi++];
77+
for(vari=0;i<len;i++){
78+
if(iinself){
79+
rv[i]=fun.call(thisp,self[i],i,self);
80+
}
81+
}
82+
returnrv;
83+
};
84+
A.some=Array.some||function(self,fun/*, thisp*/){
85+
varargi=1;
86+
varlen=self.length;
87+
if(typeoffun!="function"){
88+
thrownewTypeError('Array.prototype.some: not a function');
89+
}
90+
varthisp=arguments[argi++];
91+
for(vari=0;i<len;i++){
92+
if(iinself&&
93+
fun.call(thisp,self[i],i,self)){
94+
returntrue;
95+
}
96+
}
97+
returnfalse;
98+
};
99+
A.reduce=Array.reduce||function(self,fun/*, initial*/){
100+
varargi=1;
101+
varlen=self.length;
102+
if(typeoffun!='function'){
103+
throwTypeError('Array.prototype.reduce: not a function ');
104+
}
105+
vari=0;
106+
varprev;
107+
if(arguments.length>=argi+2){
108+
varrv=arguments[argi++];
109+
}else{
110+
do{
111+
if(iinself){
112+
rv=self[i++];
113+
break;
114+
}
115+
if(++i>=len){
116+
thrownewTypeError('Array.prototype.reduce: empty array');
117+
}
118+
}while(true);
119+
}
120+
for(;i<len;i++){
121+
if(iinself)rv=fun.call(null,rv,self[i],i,self);
122+
}
123+
returnrv;
124+
};
125+
A.first=function(self){
126+
returnself ?self[0] :null;
127+
};
128+
A.last=function(self){
129+
returnself ?self[self.length-1] :null;
130+
};
131+
132+
varDOM={
133+
ancestors:function(e){
134+
varbody=e.ownerDocument.body;
135+
varr=[];
136+
varit=e;
137+
while(it!=body){
138+
r.push(it);
139+
it=it.parentNode;
140+
}
141+
r.push(body);
142+
returnr;// [e .. document.body]
143+
},
144+
commonAncestor:function(e1,e2){
145+
vara1=Util.ancestors(e1).reverse();
146+
vara2=Util.ancestors(e2).reverse();
147+
varr=null;
148+
for(vari=0;a1[i]&&a2[i]&&a1[i]==a2[i];i++){
149+
r=a1[i];
150+
}
151+
returnr;
152+
},
153+
};
154+
155+
varLayeredExtractor=function(){
156+
};
157+
158+
varHeuristics=function(/*opt, pattern*/){
159+
varself={
160+
opt:Util.inherit(arguments[0],{
161+
threshold:60,
162+
minLength:30,
163+
factor:{
164+
decay:0.75,
165+
noBody:0.72,
166+
continuous:1.62
167+
},
168+
punctuationWeight:10,
169+
minNoLink:8,
170+
noListRatio:0.2,
171+
debug:0
172+
}),
173+
pat:Util.inherit(arguments[1],{
174+
sep:['div','center','td'],
175+
waste:/Copyright|All\s*Rights?\s*Reserved?/i,
176+
affiliate:/amazon[a-z0-9\.\/\-\?&]+-22/i,
177+
noContent:['frameset'],
178+
ignore:[
179+
'script',
180+
'style',
181+
'select',
182+
'noscript',
183+
['div',{
184+
'id':['more','menu','side','navi'],
185+
'class':['more','menu','side','navi']
186+
}]
187+
],
188+
})
189+
};
190+
191+
varBlock=Util.inherit(function(parent,nodes){
192+
varblock={parent:parent,nodes:nodes};
193+
block.isLinkList=function(){
194+
/* TODO */
195+
};
196+
block.score=function(factor,continuous){
197+
/* TODO */
198+
};
199+
returnblock;
200+
},{
201+
split:function(node,sep){
202+
varr=[];
203+
varbuf=[];
204+
varflush=function(){
205+
if(buf.length){
206+
r.push(newBlock(node,buf));
207+
buf=[];
208+
}
209+
};
210+
varchildren=node.childNodes;
211+
for(vari=0,len=children.length;i<len;i++){
212+
varc=children[i];
213+
if(A.some(sep,function(v){returnv==c.tagName;})){
214+
flush();
215+
varrec=Block.split(c);
216+
if(rec.length){
217+
Array.prototype.push.apply(r,rec);
218+
}
219+
}else{
220+
buf.push(c);
221+
}
222+
}
223+
flush();
224+
returnr;
225+
}
226+
});
227+
228+
self.extract=function(d){
229+
if(A.some(self.pat.noContent,function(v){
230+
returnd.getElementsByTagName(v).length!=0;
231+
})){
232+
return;
233+
}
234+
235+
varfactor=1.0;
236+
varcontinuous=1.0;
237+
varscore=0;
238+
// eliminate_useless_symbols
239+
// eliminate_useless_tags
240+
241+
varresult=[];
242+
varblocks=Block.split(d.body);
243+
244+
for(vari=0,len=blocks.length;i<len;i++){
245+
varblock=blocks[i];
246+
if(body)continuous/=self.opt.factor.continuous;// FIXME
247+
248+
// ignore link list block
249+
if(block.isLinklist())continue;
250+
251+
// score
252+
varc=block.score(factor,continuous);
253+
factor*=self.opt.factor.decay;
254+
255+
// clustor scoring
256+
if(block.isContinuous){
257+
// FIXME: flag?
258+
varlast=A.last(result);
259+
if(last){
260+
last.merge(block);
261+
}else{
262+
result.push(block);
263+
}
264+
continuous=self.opt.factor.continuous;
265+
}elseif(block.isAccepted()){
266+
// FIXME: flag?
267+
}else{// rejected
268+
}
269+
}
270+
};
271+
272+
returnself;
273+
};
274+
275+
vare1=d.getElementsByTagName('h1')[0];
276+
vare2=d.getElementsByTagName('h1')[0];
277+
vare=DOM.commonAncestor(e1,e2);
278+
alert(e);
279+
280+
// test
281+
// var e = d.createElement('a');
282+
// e.href = 'http://orezdnu.org/';
283+
// var text = d.createTextNode('orezdnu.org');
284+
// e.appendChild(text);
285+
returne;
286+
};

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp