Examples¶
Find mentions to cats in Yle data and print them¶
This code goes through all of YLE data, searches for word ‘kissa’ and prints the number of items found.
from hybra import core
core.set_data_path('./data/')
yle = core.data( 'news', folder = '', terms = ['yle.json'] )
yle_cats = core.filter_by( yle, 'text', text = ['kissa'] )
print len( yle_cats )
Find top 10 most common authors and print them.¶
This code goes through all of YLE data, searches all authors and prints the most common ones.
from hybra import core
core.set_data_path('./data/')
data = core.data( 'news', folder = '', terms = ['yle.json'] )
authors = core.counts( data, 'author' )
topauthors = authors.most_common(10)
for name, number in topauthors:
print "%s (%s)" % (name, number)
Search a keyword from large number of data sources¶
This code goes through several data sources and stores the outputs in pickle format.
# coding=UTF8
keyword = 'ja'
import os, sys
from hybra import core
core.set_data_path('./data/')
d = core.data( 'news', folder = '', terms = ['yle.json'] )
sample1 = core.filter_by( d, 'text', text = keyword.split(',') )
d = core.data( 'facebook', folder = '', terms = ['facebook.json'] )
sample2 = core.filter_by( d, 'text', text = keyword.split(',') )
import pickle
pickle.dump( sample1 + sample2, open( keyword + '.pickle', 'w' ) )