ez_kaggle.dataset

API details for using datasets to store competition related things (model weights, pip libraries, etc.)
from fastcore.foundation import L

Foundation


source

ds_exists

 ds_exists (dataset_slug, path='.')

Check if a dataset exists

Type Default Details
dataset_slug Dataset slug (ie “zillow/zecon”)
path str . path to fastkaggle.json file or None
assert ds_exists('isaacflath/library-fastkaggle')
assert not ds_exists('not/real/dataset')

source

mk_dataset

 mk_dataset (dataset_path, title, force=False, upload=True, cfg_path='.',
             **kwargs)

Creates minimal dataset metadata needed to push new dataset to kaggle

Type Default Details
dataset_path Local path to create dataset in
title Name of the dataset
force bool False Should it overwrite or error if exists?
upload bool True Should it upload and create on kaggle
cfg_path str . path to fastkaggle.json file or None
kwargs
mk_dataset('./testds','mytestds',force=True,upload=False)
path = Path('./testds/dataset-metadata.json')
md = json.load(open(path))
assert md['title'] == 'mytestds'
assert md['id'].endswith('/mytestds')
path.unlink()
path.parent.rmdir()
Data package template written to: testds/dataset-metadata.json

source

get_dataset

 get_dataset (dataset_slug, dataset_path, unzip=True, force=False)

Downloads an existing dataset and metadata from kaggle

Type Default Details
dataset_slug Dataset slug (ie “zillow/zecon”)
dataset_path Local path to download dataset to
unzip bool True Should it unzip after downloading?
force bool False Should it overwrite or error if dataset_path exists?
dataset_path = Path('./data-science-job-salaries')
get_dataset('ruchi798/data-science-job-salaries',dataset_path, force=True)

files = os.listdir(dataset_path)

assert L(files).sorted() == ['dataset-metadata.json', 'ds_salaries.csv']

for f in Path(dataset_path).ls(): f.unlink()
Path(dataset_path).rmdir()

source

push_dataset

 push_dataset (dataset_path, version_comment, quiet=True)

Push dataset update to kaggle. Dataset path must contain dataset metadata file

Type Default Details
dataset_path Local path where dataset is stored
version_comment Comment associated with this dataset update
quiet bool True

Pip Libraries


source

get_pip_library

 get_pip_library (pip_library, cfg_path='.', **kwargs)

Download the whl files for pip_library and store in dataset_path

Type Default Details
pip_library name of library for pip to install
cfg_path str . path to fastkaggle.json file or None
kwargs
lib = 'fastcore'
get_pip_library(lib)
assert Path(lib).exists()
Path(lib).ls().map(lambda x: x.unlink())
Path(lib).rmdir()

source

get_pip_libraries

 get_pip_libraries (directory_name, cfg_path='.', **kwargs)
Type Default Details
directory_name
cfg_path str . path to fastkaggle.json file or None
kwargs
directory_name = 'my-test-libs'
get_pip_libraries('my-test-libs')
assert Path(directory_name).exists()
Path(directory_name).ls().map(lambda x: x.unlink())
Path(directory_name).rmdir()

source

get_local_ds_ver

 get_local_ds_ver (lib_path, lib)

checks a local copy of kaggle dataset for library version number

Details
lib_path Local path dataset is stored in
lib Name of library (ie “fastcore”)

source

create_dependency_dataset

 create_dependency_dataset (version_notes='New Update', cfg_path='.',
                            **kwargs)
Type Default Details
version_notes str New Update
cfg_path str . path to fastkaggle.json file or None
kwargs
create_dependency_dataset()
path = Path('libraries-titanic')
assert path.exists()
assert ds_exists('isaacflath/libraries-titanic')
ds_exists('isaacflath/libraries-titanic')
Path(path).ls().map(lambda x: x.unlink())
Path(path).rmdir()
-----Downloading or Creating Dataset if needed
-----Checking dataset files against pip
-----Updating libraries-titanic in Kaggle
isaacflath/libraries-titanic update complete

Model Weights


source

push_fastai_learner

 push_fastai_learner (learner, model_fname, version_comment, cfg_path='.',
                      **kwargs)

Exports a learner and updates kaggle dataset

Type Default Details
learner Fastai Learner
model_fname ie model1.pkl
version_comment dataset versioning
cfg_path str . path to fastkaggle.json file or None
kwargs
from fastai.vision.all import *
import pandas as pd

path = untar_data(URLs.MNIST_SAMPLE)
df = pd.read_csv(path/'labels.csv')
dls = ImageDataLoaders.from_df(df,path)
learn = vision_learner(dls, models.resnet18, loss_func=CrossEntropyLossFlat(), ps=0.25)

push_fastai_learner(learn,'model1.pkl','testing fastkaggle')

path = Path('models-titanic')
assert path.exists()
assert ds_exists('isaacflath/models-titanic')
Path(path).ls().map(lambda x: x.unlink())
Path(path).rmdir()
[W NNPACK.cpp:51] Could not initialize NNPACK! Reason: Unsupported hardware.
-----Downloading or Creating Dataset if needed
models-titanic
isaacflath/models-titanic update complete