Examples

Find mentions to cats in Yle data and print them

This code goes through all of YLE data, searches for word ‘kissa’ and prints the number of items found.

from hybra import core

core.set_data_path('./data/')

yle = core.data( 'news', folder = '', terms = ['yle.json'] )

yle_cats = core.filter_by( yle, 'text', text = ['kissa'] )

print len( yle_cats )

Find top 10 most common authors and print them.

This code goes through all of YLE data, searches all authors and prints the most common ones.

from hybra import core

core.set_data_path('./data/')

data = core.data( 'news', folder = '', terms = ['yle.json'] )

authors = core.counts( data, 'author' )
topauthors = authors.most_common(10)

for name, number in topauthors:
    print "%s (%s)" % (name, number)

Search a keyword from large number of data sources

This code goes through several data sources and stores the outputs in pickle format.

# coding=UTF8

keyword = 'ja'

import os, sys
from hybra import core

core.set_data_path('./data/')

d = core.data( 'news', folder = '', terms = ['yle.json'] )
sample1 = core.filter_by( d, 'text', text = keyword.split(',') )

d = core.data( 'facebook', folder = '', terms = ['facebook.json'] )
sample2 = core.filter_by( d, 'text', text = keyword.split(',') )

import pickle

pickle.dump( sample1 + sample2, open( keyword + '.pickle', 'w' ) )