@@ -42,6 +42,9 @@ categories:
42
42
-category :" web-scraping"
43
43
title :" Web Scraping & Crawling"
44
44
subtitle :" Libraries for web scraping, crawling, downloading, and mining as well as libraries."
45
+ -category :" data-loading"
46
+ title :" Data Loading & Extraction"
47
+ subtitle :" Libraries for loading, collecting, and extracting data from a variety of data sources and formats."
45
48
-category :" monitoring"
46
49
title :" Monitoring"
47
50
-category :" web-ui"
@@ -2692,3 +2695,129 @@ projects:
2692
2695
-name :IP2Trace
2693
2696
github_id :ip2location/ip2trace-python
2694
2697
pypi_id :IP2Trace
2698
+ # Data Loading & Extraction
2699
+ -name :Datasets
2700
+ pypi_id :datasets
2701
+ github_id :huggingface/datasets
2702
+ category :data-loading
2703
+ -name :tabulator-py
2704
+ github_id :frictionlessdata/tabulator-py
2705
+ category :data-loading
2706
+ conda_id :conda-forge/tabulator-py
2707
+ pypi_id :tabulator
2708
+ -name :messytables
2709
+ github_id :okfn/messytables
2710
+ category :data-loading
2711
+ pypi_id :messytables
2712
+ -name :datatest
2713
+ github_id :shawnbrown/datatest
2714
+ category :data-loading
2715
+ pypi_id :datatest
2716
+ -name :rows
2717
+ github_id :turicas/rows
2718
+ category :data-loading
2719
+ pypi_id :rows
2720
+ -name :deepdish
2721
+ github_id :uchicago-cs/deepdish
2722
+ category :data-loading
2723
+ conda_id :conda-forge/deepdish
2724
+ pypi_id :deepdish
2725
+ -name :camelot
2726
+ github_id :camelot-dev/camelot
2727
+ category :data-loading
2728
+ pypi_id :camelot
2729
+ -name :excalibur
2730
+ github_id :camelot-dev/excalibur
2731
+ category :data-loading
2732
+ pypi_id :excalibur-py
2733
+ -name :xlwings
2734
+ github_id :xlwings/xlwings
2735
+ category :data-loading
2736
+ conda_id :conda-forge/xlwings
2737
+ pypi_id :xlwings
2738
+ -name :csvs-to-sqlite
2739
+ github_id :simonw/csvs-to-sqlite
2740
+ category :data-loading
2741
+ pypi_id :csvs-to-sqlite
2742
+ -name :Tablib
2743
+ pypi_id :tablib
2744
+ github_id :jazzband/tablib
2745
+ conda_id :conda-forge/tablib
2746
+ category :data-loading
2747
+ -name :python-magic
2748
+ pypi_id :python-magic
2749
+ github_id :ahupp/python-magic
2750
+ conda_id :conda-forge/python-magic
2751
+ category :data-loading
2752
+ -name :SDV
2753
+ pypi_id :sdv
2754
+ github_id :sdv-dev/SDV
2755
+ category :data-loading
2756
+ -name :Intake
2757
+ pypi_id :intake
2758
+ github_id :intake/intake
2759
+ conda_id :conda-forge/intake
2760
+ category :data-loading
2761
+ -name :csvkit
2762
+ pypi_id :csvkit
2763
+ github_id :wireservice/csvkit
2764
+ conda_id :conda-forge/csvkit
2765
+ category :data-loading
2766
+ -name :snorkel
2767
+ pypi_id :snorkel
2768
+ github_id :snorkel-team/snorkel
2769
+ conda_id :conda-forge/snorkel
2770
+ category :data-loading
2771
+ -name :pyexcel-xlsx
2772
+ pypi_id :pyexcel-xlsx
2773
+ github_id :pyexcel/pyexcel-xlsx
2774
+ conda_id :conda-forge/pyexcel-xlsx
2775
+ category :data-loading
2776
+ -name :Faker
2777
+ pypi_id :Faker
2778
+ github_id :joke2k/faker
2779
+ conda_id :conda-forge/faker
2780
+ category :data-loading
2781
+ -name :smart-open
2782
+ pypi_id :smart-open
2783
+ github_id :RaRe-Technologies/smart_open
2784
+ category :data-loading
2785
+ -name :pandas-datareader
2786
+ pypi_id :pandas-datareader
2787
+ github_id :pydata/pandas-datareader
2788
+ conda_id :conda-forge/pandas-datareader
2789
+ category :data-loading
2790
+ -name :openpyxl
2791
+ pypi_id :openpyxl
2792
+ gitlab_id :" https://foss.heptapod.net/api/graphql::openpyxl/openpyxl"
2793
+ conda_id :openpyxl
2794
+ dockerhub_id :" openpyxl/openpyxl-ci"
2795
+ docs_url :https://openpyxl.readthedocs.io/en/stable/
2796
+ license :MIT
2797
+ category :data-loading
2798
+ -name :textract
2799
+ pypi_id :textract
2800
+ github_id :deanmalmgren/textract
2801
+ conda_id :conda-forge/textract
2802
+ category :data-loading
2803
+ -name :PDFMiner
2804
+ pypi_id :pdfminer
2805
+ conda_id :conda-forge/pdfminer
2806
+ github_id :euske/pdfminer
2807
+ category :data-loading
2808
+ -name :xmltodict
2809
+ pypi_id :xmltodict
2810
+ github_id :martinblech/xmltodict
2811
+ conda_id :conda-forge/xmltodict
2812
+ category :data-loading
2813
+ -name :Singer
2814
+ pypi_id :singer-python
2815
+ github_id :singer-io/getting-started
2816
+ description :" Standard for moving data between databases, web APIs, files, queues, and just about anything else you can think of."
2817
+ license :AGPL-3.0
2818
+ category :data-loading
2819
+ -name :xlrd
2820
+ pypi_id :xlrd
2821
+ github_id :python-excel/xlrd
2822
+ conda_id :conda-forge/xlrd
2823
+ category :data-loading