Open
Description
Hi, there,
What a user-friendly notebook! But I have just met and error when running
# Load the promoter dataset from the InstaDeep Hugging Face ressources
dataset_name = "promoter_all"
train_dataset_promoter = load_dataset(
"InstaDeepAI/nucleotide_transformer_downstream_tasks_revised",
dataset_name,
split="train",
streaming= False,trust_remote_code=True
)
test_dataset_promoter = load_dataset(
"InstaDeepAI/nucleotide_transformer_downstream_tasks",
dataset_name,
split="test",
streaming= False,trust_remote_code=True
)
And the error like:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[24], line 5
3 # Load the promoter dataset from the InstaDeep Hugging Face ressources
4 dataset_name = "promoter_all"
----> 5 train_dataset_promoter = load_dataset(
6 "InstaDeepAI/nucleotide_transformer_downstream_tasks_revised",
7 dataset_name,
8 split="train",
9 streaming= False,trust_remote_code=True
10 )
11 test_dataset_promoter = load_dataset(
12 "InstaDeepAI/nucleotide_transformer_downstream_tasks",
13 dataset_name,
14 split="test",
15 streaming= False,trust_remote_code=True
16 )
File /public/home/hhl/miniconda3/envs/transformer/lib/python3.9/site-packages/datasets/load.py:2129, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, keep_in_memory, save_infos, revision, token, streaming, num_proc, storage_options, trust_remote_code, **config_kwargs)
2124 verification_mode = VerificationMode(
2125 (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
2126 )
2128 # Create a dataset builder
-> 2129 builder_instance = load_dataset_builder(
2130 path=path,
2131 name=name,
2132 data_dir=data_dir,
2133 data_files=data_files,
2134 cache_dir=cache_dir,
2135 features=features,
2136 download_config=download_config,
2137 download_mode=download_mode,
2138 revision=revision,
2139 token=token,
2140 storage_options=storage_options,
2141 trust_remote_code=trust_remote_code,
2142 _require_default_config_name=name is None,
2143 **config_kwargs,
2144 )
2146 # Return iterable dataset in case of streaming
2147 if streaming:
File /public/home/hhl/miniconda3/envs/transformer/lib/python3.9/site-packages/datasets/load.py:1886, in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, storage_options, trust_remote_code, _require_default_config_name, **config_kwargs)
1884 builder_cls = get_dataset_builder_class(dataset_module, dataset_name=dataset_name)
1885 # Instantiate the dataset builder
-> 1886 builder_instance: DatasetBuilder = builder_cls(
1887 cache_dir=cache_dir,
1888 dataset_name=dataset_name,
1889 config_name=config_name,
1890 data_dir=data_dir,
1891 data_files=data_files,
1892 hash=dataset_module.hash,
1893 info=info,
1894 features=features,
1895 token=token,
1896 storage_options=storage_options,
1897 **builder_kwargs,
1898 **config_kwargs,
1899 )
1900 builder_instance._use_legacy_cache_dir_if_possible(dataset_module)
1902 return builder_instance
TypeError: 'NoneType' object is not callable
And I have tried pip install sentencepiece
before loading dataset. I wonder how can I load dataset in my local server, I have tried but it could not find the available file at all.
All the best
Metadata
Metadata
Assignees
Labels
No labels