Skip to content

Commit c513cb0

Browse files
Analysis.py
1 parent 0a88244 commit c513cb0

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import time
2+
import pandas as pd
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
import nltk
6+
import io
7+
import unicodedata
8+
import numpy as np
9+
import re
10+
import string
11+
from numpy import linalg
12+
from nltk.sentiment.vader import SentimentIntensityAnalyzer
13+
from nltk.tokenize import sent_tokenize, word_tokenize
14+
from nltk.tokenize import PunktSentenceTokenizer
15+
from nltk.tokenize import PunktSentenceTokenizer
16+
from nltk.corpus import webtext
17+
from nltk.stem.porter import PorterStemmer
18+
from nltk.stem.wordnet import WordNetLemmatizer
19+
20+
21+
with open('kindle.txt', encoding ='ISO-8859-2') as f:
22+
text = f.read()
23+
24+
sent_tokenizer = PunktSentenceTokenizer(text)
25+
sents = sent_tokenizer.tokenize(text)
26+
27+
print(word_tokenize(text))
28+
print(sent_tokenize(text))
29+
30+
porter_stemmer = PorterStemmer()
31+
32+
nltk_tokens = nltk.word_tokenize(text)
33+
34+
for w in nltk_tokens:
35+
print ("Actual: % s Stem: % s" % (w, porter_stemmer.stem(w)))
36+
37+
38+
wordnet_lemmatizer = WordNetLemmatizer()
39+
nltk_tokens = nltk.word_tokenize(text)
40+
41+
for w in nltk_tokens:
42+
print ("Actual: % s Lemma: % s" % (w, wordnet_lemmatizer.lemmatize(w)))
43+
44+
text = nltk.word_tokenize(text)
45+
print(nltk.pos_tag(text))
46+
47+
sid = SentimentIntensityAnalyzer()
48+
tokenizer = nltk.data.load('tokenizers / punkt / english.pickle')
49+
50+
with open('kindle.txt', encoding ='ISO-8859-2') as f:
51+
for text in f.read().split('\n'):
52+
print(text)
53+
scores = sid.polarity_scores(text)
54+
for key in sorted(scores):
55+
print('{0}: {1}, '.format(key, scores[key]), end ='')
56+
57+
print()

0 commit comments

Comments
 (0)