Marathi Wikipedia Dump dataset for language nlp.
# Download from https://dumps.wikimedia.org/mrwiki/
import bz2
import xml.etree.ElementTree as ET
# with bz2.open('mrwiki-latest-pages-articles.xml.bz2', 'rt') as f:
# tree = ET.iterparse(f, events=('start', 'end'))
print("Download Marathi Wikipedia dump from dumps.wikimedia.org/mrwiki/")| Field | Type | Description |
|---|---|---|
| text | string | Raw Wikipedia dump text in Marathi |
| title | string | Article title |
| id | string | Wikipedia article ID |