|
| 1 | +#data_analysis |
| 2 | +数据分析 |
| 3 | + |
| 4 | +1)scripts/stats.py(数据分析脚本)</br> |
| 5 | +1.使用方法</br> |
| 6 | +python scripts/stats.py 输入dataframe所在csv 输出dataframe所在csv 待分析特征变量</br> |
| 7 | + |
| 8 | +2.使用例子</br> |
| 9 | +python scripts/stats.py 'd:\age.csv' 'd:\output.csv' 'age'</br> |
| 10 | + |
| 11 | +3.参数解释</br> |
| 12 | +输入dataframe</br> |
| 13 | +UserIdage </br> |
| 14 | +02722683822 </br> |
| 15 | +12829756330 </br> |
| 16 | +22795128627 </br> |
| 17 | +32786559924 </br> |
| 18 | +42042801 27 </br> |
| 19 | +52862464337 </br> |
| 20 | +62876906928</br> |
| 21 | +... </br> |
| 22 | + |
| 23 | +输出dataframe(字段解释清移步wiki)</br> |
| 24 | +age,cnt_rec,cnt_target,%target,%cnt_rec,%cnt_target,%cum_cnt_rec,%cum_cnt_target,cnt_nontarget,%cnt_nontarget,%cum_nontarget,%cum_target-%cum_nontarget </br> |
| 25 | +18,110,9.0,8.18%,0.36%,0.53%,0.36%,0.53%,101.0,0.35%,0.35%,0.18%</br> |
| 26 | +19,479,22.0,4.59%,1.57%,1.30%,1.93%,1.84%,457.0,1.59%,1.94%,-0.28%</br> |
| 27 | +20,1006,45.0,4.47%,3.30%,2.67%,5.23%,4.50%,961.0,3.34%,5.27%,-0.67%</br> |
| 28 | +21,1765,77.0,4.36%,5.79%,4.56%,11.01%,9.06%,1688.0,5.86%,11.14%,-1.30%</br> |
| 29 | +22,1825,69.0,3.78%,5.98%,4.09%,17.00%,13.15%,1756.0,6.10%,17.23%,-2.01%</br> |
| 30 | +23,1865,82.0,4.40%,6.11%,4.86%,23.11%,18.01%,1783.0,6.19%,23.42%,-1.33%</br> |
| 31 | +24,1931,118.0,6.11%,6.33%,6.99%,29.44%,25.00%,1813.0,6.29%,29.72%,0.70%</br> |
| 32 | + |
| 33 | + |
| 34 | +)scripts/woe.py(计算woe和iv脚本)</br> |
| 35 | +1.使用方法</br> |
| 36 | +python scripts/woe.py 输入dataframe所在csv 待分析特征变量 分段表达式(用逗号连接) y变量</br> |
| 37 | + |
| 38 | +2.使用例子</br> |
| 39 | +python scripts/woe.py "age.csv" "age" "20,30,45" "is_dft"</br> |
| 40 | + |
| 41 | +3.参数解释</br> |
| 42 | +输入dataframe</br> |
| 43 | +UserId age is_dft</br> |
| 44 | +02722683822 1 </br> |
| 45 | +12829756330 1</br> |
| 46 | +22795128627 0</br> |
| 47 | +32786559924 1</br> |
| 48 | +42042801 27 0</br> |
| 49 | +52862464337 1</br> |
| 50 | +62876906928 1</br> |
| 51 | +...</br> |
| 52 | + |
| 53 | +输出结果</br> |
| 54 | + class good bad %good %bad all woe iv</br> |
| 55 | +0 (0,20.0] 76 1519 4.76% 95.24% 1595 -6.34584 0.048765</br> |
| 56 | +1 (20,30] 895 17129 4.97% 95.03% 18024 -8.75549 0.561679</br> |
| 57 | +2 (30,45] 673 10021 6.29% 93.71% 10694 7.31007 0.372628</br> |
| 58 | +3 (45...) 75 869 7.94% 92.06% 944 2.57974 0.036832</br> |
| 59 | +4 NA 0 0 nan% nan% 0 NaN 0.000000</br> |
| 60 | + 1688 28819 5.53% 94.47% 30507 1.019905</br> |