Source code for wordclouds
from __future__ import absolute_import, division, print_function, unicode_literals
from collections import Counter
import re
[docs]def create_wordcloud( data, stopwords = ["the", "a", "or", "tai", "and", "ja", "to", "on", "in", "of", "for", "is", "i", "this", "http", "www", "fi", "com"] ):
import types
if isinstance( data, types.GeneratorType ):
data = list( data )
if len(data) == 0:
print( "Dataset empty." )
return
from wordcloud import WordCloud
text = ''
for d in data:
text += d['text_content'].lower() + ' '
text = text.strip()
stopwords = map( lambda w: str(w), stopwords )
wc = WordCloud( background_color="white", width=800, height=400, stopwords = stopwords )
wc.generate( text )
plt.figure(figsize=(15,10))
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
plt.show()