Witllm/dataset/MNBVC.py

6 lines
166 B
Python

from datasets import load_dataset
dataset = load_dataset("liwu/MNBVC", "wikipedia", split="train", streaming=True)
print(next(iter(dataset))) # get the first line