|
53 | 53 | Apply a window along a given axis
|
54 | 54 | """
|
55 | 55 |
|
56 |
| -importcsv |
57 | 56 | importfunctools
|
58 | 57 | fromnumbersimportNumber
|
59 | 58 |
|
@@ -985,286 +984,6 @@ def cohere(x, y, NFFT=256, Fs=2, detrend=detrend_none, window=window_hanning,
|
985 | 984 | returnCxy,f
|
986 | 985 |
|
987 | 986 |
|
988 |
| -def_csv2rec(fname,comments='#',skiprows=0,checkrows=0,delimiter=',', |
989 |
| -converterd=None,names=None,missing='',missingd=None, |
990 |
| -use_mrecords=False,dayfirst=False,yearfirst=False): |
991 |
| -""" |
992 |
| - Load data from comma/space/tab delimited file in *fname* into a |
993 |
| - numpy record array and return the record array. |
994 |
| -
|
995 |
| - If *names* is *None*, a header row is required to automatically |
996 |
| - assign the recarray names. The headers will be lower cased, |
997 |
| - spaces will be converted to underscores, and illegal attribute |
998 |
| - name characters removed. If *names* is not *None*, it is a |
999 |
| - sequence of names to use for the column names. In this case, it |
1000 |
| - is assumed there is no header row. |
1001 |
| -
|
1002 |
| -
|
1003 |
| - - *fname*: can be a filename or a file handle. Support for gzipped |
1004 |
| - files is automatic, if the filename ends in '.gz' |
1005 |
| -
|
1006 |
| - - *comments*: the character used to indicate the start of a comment |
1007 |
| - in the file, or *None* to switch off the removal of comments |
1008 |
| -
|
1009 |
| - - *skiprows*: is the number of rows from the top to skip |
1010 |
| -
|
1011 |
| - - *checkrows*: is the number of rows to check to validate the column |
1012 |
| - data type. When set to zero all rows are validated. |
1013 |
| -
|
1014 |
| - - *converterd*: if not *None*, is a dictionary mapping column number or |
1015 |
| - munged column name to a converter function. |
1016 |
| -
|
1017 |
| - - *names*: if not None, is a list of header names. In this case, no |
1018 |
| - header will be read from the file |
1019 |
| -
|
1020 |
| - - *missingd* is a dictionary mapping munged column names to field values |
1021 |
| - which signify that the field does not contain actual data and should |
1022 |
| - be masked, e.g., '0000-00-00' or 'unused' |
1023 |
| -
|
1024 |
| - - *missing*: a string whose value signals a missing field regardless of |
1025 |
| - the column it appears in |
1026 |
| -
|
1027 |
| - - *use_mrecords*: if True, return an mrecords.fromrecords record array if |
1028 |
| - any of the data are missing |
1029 |
| -
|
1030 |
| - - *dayfirst*: default is False so that MM-DD-YY has precedence over |
1031 |
| - DD-MM-YY. See |
1032 |
| - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 |
1033 |
| - for further information. |
1034 |
| -
|
1035 |
| - - *yearfirst*: default is False so that MM-DD-YY has precedence over |
1036 |
| - YY-MM-DD. See |
1037 |
| - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 |
1038 |
| - for further information. |
1039 |
| -
|
1040 |
| - If no rows are found, *None* is returned |
1041 |
| - """ |
1042 |
| - |
1043 |
| -ifconverterdisNone: |
1044 |
| -converterd=dict() |
1045 |
| - |
1046 |
| -ifmissingdisNone: |
1047 |
| -missingd= {} |
1048 |
| - |
1049 |
| -importdateutil.parser |
1050 |
| -importdatetime |
1051 |
| - |
1052 |
| -fh=cbook.to_filehandle(fname) |
1053 |
| - |
1054 |
| -delimiter=str(delimiter) |
1055 |
| - |
1056 |
| -classFH: |
1057 |
| -""" |
1058 |
| - For space-delimited files, we want different behavior than |
1059 |
| - comma or tab. Generally, we want multiple spaces to be |
1060 |
| - treated as a single separator, whereas with comma and tab we |
1061 |
| - want multiple commas to return multiple (empty) fields. The |
1062 |
| - join/strip trick below effects this. |
1063 |
| - """ |
1064 |
| -def__init__(self,fh): |
1065 |
| -self.fh=fh |
1066 |
| - |
1067 |
| -defclose(self): |
1068 |
| -self.fh.close() |
1069 |
| - |
1070 |
| -defseek(self,arg): |
1071 |
| -self.fh.seek(arg) |
1072 |
| - |
1073 |
| -deffix(self,s): |
1074 |
| -return' '.join(s.split()) |
1075 |
| - |
1076 |
| -def__next__(self): |
1077 |
| -returnself.fix(next(self.fh)) |
1078 |
| - |
1079 |
| -def__iter__(self): |
1080 |
| -forlineinself.fh: |
1081 |
| -yieldself.fix(line) |
1082 |
| - |
1083 |
| -ifdelimiter==' ': |
1084 |
| -fh=FH(fh) |
1085 |
| - |
1086 |
| -reader=csv.reader(fh,delimiter=delimiter) |
1087 |
| - |
1088 |
| -defprocess_skiprows(reader): |
1089 |
| -ifskiprows: |
1090 |
| -fori,rowinenumerate(reader): |
1091 |
| -ifi>= (skiprows-1): |
1092 |
| -break |
1093 |
| - |
1094 |
| -returnfh,reader |
1095 |
| - |
1096 |
| -process_skiprows(reader) |
1097 |
| - |
1098 |
| -defismissing(name,val): |
1099 |
| -"""Return whether the value val in column name should be masked.""" |
1100 |
| -returnval==missingorval==missingd.get(name)orval=='' |
1101 |
| - |
1102 |
| -defwith_default_value(func,default): |
1103 |
| -defnewfunc(name,val): |
1104 |
| -ifismissing(name,val): |
1105 |
| -returndefault |
1106 |
| -else: |
1107 |
| -returnfunc(val) |
1108 |
| -returnnewfunc |
1109 |
| - |
1110 |
| -defmybool(x): |
1111 |
| -ifx=='True': |
1112 |
| -returnTrue |
1113 |
| -elifx=='False': |
1114 |
| -returnFalse |
1115 |
| -else: |
1116 |
| -raiseValueError('invalid bool') |
1117 |
| - |
1118 |
| -dateparser=dateutil.parser.parse |
1119 |
| - |
1120 |
| -defmydateparser(x): |
1121 |
| -# try and return a datetime object |
1122 |
| -d=dateparser(x,dayfirst=dayfirst,yearfirst=yearfirst) |
1123 |
| -returnd |
1124 |
| - |
1125 |
| -mydateparser=with_default_value(mydateparser,datetime.datetime(1,1,1)) |
1126 |
| - |
1127 |
| -myfloat=with_default_value(float,np.nan) |
1128 |
| -myint=with_default_value(int,-1) |
1129 |
| -mystr=with_default_value(str,'') |
1130 |
| -mybool=with_default_value(mybool,None) |
1131 |
| - |
1132 |
| -defmydate(x): |
1133 |
| -# try and return a date object |
1134 |
| -d=dateparser(x,dayfirst=dayfirst,yearfirst=yearfirst) |
1135 |
| - |
1136 |
| -ifd.hour>0ord.minute>0ord.second>0: |
1137 |
| -raiseValueError('not a date') |
1138 |
| -returnd.date() |
1139 |
| -mydate=with_default_value(mydate,datetime.date(1,1,1)) |
1140 |
| - |
1141 |
| -defget_func(name,item,func): |
1142 |
| -# promote functions in this order |
1143 |
| -funcs= [mybool,myint,myfloat,mydate,mydateparser,mystr] |
1144 |
| -forfuncinfuncs[funcs.index(func):]: |
1145 |
| -try: |
1146 |
| -func(name,item) |
1147 |
| -exceptException: |
1148 |
| -continue |
1149 |
| -returnfunc |
1150 |
| -raiseValueError('Could not find a working conversion function') |
1151 |
| - |
1152 |
| -# map column names that clash with builtins -- TODO - extend this list |
1153 |
| -itemd= { |
1154 |
| -'return':'return_', |
1155 |
| -'file':'file_', |
1156 |
| -'print':'print_', |
1157 |
| - } |
1158 |
| - |
1159 |
| -defget_converters(reader,comments): |
1160 |
| - |
1161 |
| -converters=None |
1162 |
| -i=0 |
1163 |
| -forrowinreader: |
1164 |
| -if (len(row)andcommentsisnotNoneand |
1165 |
| -row[0].startswith(comments)): |
1166 |
| -continue |
1167 |
| -ifi==0: |
1168 |
| -converters= [mybool]*len(row) |
1169 |
| -ifcheckrowsandi>checkrows: |
1170 |
| -break |
1171 |
| -i+=1 |
1172 |
| - |
1173 |
| -forj, (name,item)inenumerate(zip(names,row)): |
1174 |
| -func=converterd.get(j) |
1175 |
| -iffuncisNone: |
1176 |
| -func=converterd.get(name) |
1177 |
| -iffuncisNone: |
1178 |
| -func=converters[j] |
1179 |
| -iflen(item.strip()): |
1180 |
| -func=get_func(name,item,func) |
1181 |
| -else: |
1182 |
| -# how should we handle custom converters and defaults? |
1183 |
| -func=with_default_value(func,None) |
1184 |
| -converters[j]=func |
1185 |
| -returnconverters |
1186 |
| - |
1187 |
| -# Get header and remove invalid characters |
1188 |
| -needheader=namesisNone |
1189 |
| - |
1190 |
| -ifneedheader: |
1191 |
| -forrowinreader: |
1192 |
| -if (len(row)andcommentsisnotNoneand |
1193 |
| -row[0].startswith(comments)): |
1194 |
| -continue |
1195 |
| -headers=row |
1196 |
| -break |
1197 |
| - |
1198 |
| -# remove these chars |
1199 |
| -delete=set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""") |
1200 |
| -delete.add('"') |
1201 |
| - |
1202 |
| -names= [] |
1203 |
| -seen=dict() |
1204 |
| -fori,iteminenumerate(headers): |
1205 |
| -item=item.strip().lower().replace(' ','_') |
1206 |
| -item=''.join([cforcinitemifcnotindelete]) |
1207 |
| -ifnotlen(item): |
1208 |
| -item='column%d'%i |
1209 |
| - |
1210 |
| -item=itemd.get(item,item) |
1211 |
| -cnt=seen.get(item,0) |
1212 |
| -ifcnt>0: |
1213 |
| -names.append(item+'_%d'%cnt) |
1214 |
| -else: |
1215 |
| -names.append(item) |
1216 |
| -seen[item]=cnt+1 |
1217 |
| - |
1218 |
| -else: |
1219 |
| -ifisinstance(names,str): |
1220 |
| -names= [n.strip()forninnames.split(',')] |
1221 |
| - |
1222 |
| -# get the converter functions by inspecting checkrows |
1223 |
| -converters=get_converters(reader,comments) |
1224 |
| -ifconvertersisNone: |
1225 |
| -raiseValueError('Could not find any valid data in CSV file') |
1226 |
| - |
1227 |
| -# reset the reader and start over |
1228 |
| -fh.seek(0) |
1229 |
| -reader=csv.reader(fh,delimiter=delimiter) |
1230 |
| -process_skiprows(reader) |
1231 |
| - |
1232 |
| -ifneedheader: |
1233 |
| -whileTrue: |
1234 |
| -# skip past any comments and consume one line of column header |
1235 |
| -row=next(reader) |
1236 |
| -if (len(row)andcommentsisnotNoneand |
1237 |
| -row[0].startswith(comments)): |
1238 |
| -continue |
1239 |
| -break |
1240 |
| - |
1241 |
| -# iterate over the remaining rows and convert the data to date |
1242 |
| -# objects, ints, or floats as appropriate |
1243 |
| -rows= [] |
1244 |
| -rowmasks= [] |
1245 |
| -fori,rowinenumerate(reader): |
1246 |
| -ifnotlen(row): |
1247 |
| -continue |
1248 |
| -ifcommentsisnotNoneandrow[0].startswith(comments): |
1249 |
| -continue |
1250 |
| -# Ensure that the row returned always has the same nr of elements |
1251 |
| -row.extend(['']* (len(converters)-len(row))) |
1252 |
| -rows.append([func(name,val) |
1253 |
| -forfunc,name,valinzip(converters,names,row)]) |
1254 |
| -rowmasks.append([ismissing(name,val) |
1255 |
| -forname,valinzip(names,row)]) |
1256 |
| -fh.close() |
1257 |
| - |
1258 |
| -ifnotlen(rows): |
1259 |
| -returnNone |
1260 |
| - |
1261 |
| -ifuse_mrecordsandnp.any(rowmasks): |
1262 |
| -r=np.ma.mrecords.fromrecords(rows,names=names,mask=rowmasks) |
1263 |
| -else: |
1264 |
| -r=np.rec.fromrecords(rows,names=names) |
1265 |
| -returnr |
1266 |
| - |
1267 |
| - |
1268 | 987 | classGaussianKDE:
|
1269 | 988 | """
|
1270 | 989 | Representation of a kernel-density estimate using Gaussian kernels.
|
|