Create a bar chart for the 20 most frequently used words
import matplotlib.pyplot as plt a = transformed_word_frequency.most_common(20)bar_values =list(list(zip(*a)))x_val =list(bar_values[0])y_val =list(bar_values[1])plt.figure(figsize=(12,8))#Customize plot sizeplt.barh(x_val, y_val, color='blue',height=0.3)plt.xlabel("Word Counts")plt.gca().invert_yaxis()
Create a wordcloud chart for the extracted text data
Modify 4 Find Word Frequencies by:
#4 Find Word Frequenciesword_str =" "# from collections import Counter# # Hold our word counts in a Counter Object# transformed_word_frequency = Counter()# # Apply filter list# for document in tdm_client.dataset_reader(dataset_file):# if use_filtered_list is True:# document_id = document['id']# # Skip documents not in our filtered_id_list# if document_id not in filtered_id_list:# continue# unigrams = document.get("unigramCount", [])# for gram, count in unigrams.items():# clean_gram = gram.lower() # Lowercase the unigram word_str +=" "+ clean_gram #Added: string of all words# if clean_gram in stop_words: # Remove unigrams from stop words# continue# if not clean_gram.isalpha(): # Remove unigrams that are not alphanumeric# continue# transformed_word_frequency[clean_gram] += count
#Install wordcloudpip install wordcloud
#Install matplotlib for word plot cloudfrom wordcloud import WordCloud, STOPWORDS import matplotlib.pyplot as plt