from fastcore.foundation import L
ez_kaggle.dataset
API details for using datasets to store competition related things (model weights, pip libraries, etc.)
Foundation
ds_exists
ds_exists (dataset_slug, path='.')
Check if a dataset exists
Type | Default | Details | |
---|---|---|---|
dataset_slug | Dataset slug (ie “zillow/zecon”) | ||
path | str | . | path to fastkaggle.json file or None |
assert ds_exists('isaacflath/library-fastkaggle')
assert not ds_exists('not/real/dataset')
mk_dataset
mk_dataset (dataset_path, title, force=False, upload=True, cfg_path='.', **kwargs)
Creates minimal dataset metadata needed to push new dataset to kaggle
Type | Default | Details | |
---|---|---|---|
dataset_path | Local path to create dataset in | ||
title | Name of the dataset | ||
force | bool | False | Should it overwrite or error if exists? |
upload | bool | True | Should it upload and create on kaggle |
cfg_path | str | . | path to fastkaggle.json file or None |
kwargs |
'./testds','mytestds',force=True,upload=False)
mk_dataset(= Path('./testds/dataset-metadata.json')
path = json.load(open(path))
md assert md['title'] == 'mytestds'
assert md['id'].endswith('/mytestds')
path.unlink() path.parent.rmdir()
Data package template written to: testds/dataset-metadata.json
get_dataset
get_dataset (dataset_slug, dataset_path, unzip=True, force=False)
Downloads an existing dataset and metadata from kaggle
Type | Default | Details | |
---|---|---|---|
dataset_slug | Dataset slug (ie “zillow/zecon”) | ||
dataset_path | Local path to download dataset to | ||
unzip | bool | True | Should it unzip after downloading? |
force | bool | False | Should it overwrite or error if dataset_path exists? |
= Path('./data-science-job-salaries')
dataset_path 'ruchi798/data-science-job-salaries',dataset_path, force=True)
get_dataset(
= os.listdir(dataset_path)
files
assert L(files).sorted() == ['dataset-metadata.json', 'ds_salaries.csv']
for f in Path(dataset_path).ls(): f.unlink()
Path(dataset_path).rmdir()
push_dataset
push_dataset (dataset_path, version_comment, quiet=True)
Push dataset update to kaggle. Dataset path must contain dataset metadata file
Type | Default | Details | |
---|---|---|---|
dataset_path | Local path where dataset is stored | ||
version_comment | Comment associated with this dataset update | ||
quiet | bool | True |
Pip Libraries
get_pip_library
get_pip_library (pip_library, cfg_path='.', **kwargs)
Download the whl files for pip_library and store in dataset_path
Type | Default | Details | |
---|---|---|---|
pip_library | name of library for pip to install | ||
cfg_path | str | . | path to fastkaggle.json file or None |
kwargs |
= 'fastcore'
lib
get_pip_library(lib)assert Path(lib).exists()
map(lambda x: x.unlink())
Path(lib).ls(). Path(lib).rmdir()
get_pip_libraries
get_pip_libraries (directory_name, cfg_path='.', **kwargs)
Type | Default | Details | |
---|---|---|---|
directory_name | |||
cfg_path | str | . | path to fastkaggle.json file or None |
kwargs |
= 'my-test-libs'
directory_name 'my-test-libs')
get_pip_libraries(assert Path(directory_name).exists()
map(lambda x: x.unlink())
Path(directory_name).ls(). Path(directory_name).rmdir()
get_local_ds_ver
get_local_ds_ver (lib_path, lib)
checks a local copy of kaggle dataset for library version number
Details | |
---|---|
lib_path | Local path dataset is stored in |
lib | Name of library (ie “fastcore”) |
create_dependency_dataset
create_dependency_dataset (version_notes='New Update', cfg_path='.', **kwargs)
Type | Default | Details | |
---|---|---|---|
version_notes | str | New Update | |
cfg_path | str | . | path to fastkaggle.json file or None |
kwargs |
create_dependency_dataset()= Path('libraries-titanic')
path assert path.exists()
assert ds_exists('isaacflath/libraries-titanic')
'isaacflath/libraries-titanic')
ds_exists(map(lambda x: x.unlink())
Path(path).ls(). Path(path).rmdir()
-----Downloading or Creating Dataset if needed
-----Checking dataset files against pip
-----Updating libraries-titanic in Kaggle
isaacflath/libraries-titanic update complete
Model Weights
push_fastai_learner
push_fastai_learner (learner, model_fname, version_comment, cfg_path='.', **kwargs)
Exports a learner and updates kaggle dataset
Type | Default | Details | |
---|---|---|---|
learner | Fastai Learner | ||
model_fname | ie model1.pkl |
||
version_comment | dataset versioning | ||
cfg_path | str | . | path to fastkaggle.json file or None |
kwargs |
from fastai.vision.all import *
import pandas as pd
= untar_data(URLs.MNIST_SAMPLE)
path = pd.read_csv(path/'labels.csv')
df = ImageDataLoaders.from_df(df,path)
dls = vision_learner(dls, models.resnet18, loss_func=CrossEntropyLossFlat(), ps=0.25)
learn
'model1.pkl','testing fastkaggle')
push_fastai_learner(learn,
= Path('models-titanic')
path assert path.exists()
assert ds_exists('isaacflath/models-titanic')
map(lambda x: x.unlink())
Path(path).ls(). Path(path).rmdir()
[W NNPACK.cpp:51] Could not initialize NNPACK! Reason: Unsupported hardware.
-----Downloading or Creating Dataset if needed
models-titanic
isaacflath/models-titanic update complete