File size: 553 Bytes
3992084
 
 
 
eb6656d
3992084
 
 
 
 
 
eb6656d
3992084
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from rdflib import Graph

# Downloaded from https://databus.dbpedia.org/dbpedia/text/short-abstracts
raw_data_path = 'data/raw/short-abstracts_lang=hu.ttl'
preprocessed_data_path = 'data/preprocessed/shortened_abstracts_hu_2021_09_01.txt'

g = Graph()
g.parse(raw_data_path, format='turtle')

i = 0
objects = []
with open(preprocessed_data_path, 'w') as f:
    print(len(g))
    for subject, predicate, object in g:
        objects.append(object.replace(' +/-','').replace('\n',' '))
        objects.append('\n')
        i += 1
    f.writelines(objects)