Issues in the file data folder monument 300 zip
shubham9455999082 opened this issue · 1 comments
shubham9455999082 commented
there are some isssues in the file data folder monument 300 zip file in this file build_vocab there are some library are not mentioned
import numpy as np
from tensorflow.contrib import learn
import sys
from importlib import reload
reload(sys)
#there is no neccessitiy of encoding as it is use in python 2.5 version so we can remove
sys.setdefaultencoding("utf-8")
x_text = list()
#there is no arguement in the script so we can change 1 to 0
with open(sys.argv[0]) as f:
for line in f:
#we will remove unicode
x_text.append(unicode(line[:-1]))
# x_text = ['This is a cat','This must be boy', 'This is a a dog']
max_document_length = max([len(x.split(" ")) for x in x_text])
## Create the vocabularyprocessor object, setting the max lengh of the documents.
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
## Transform the documents using the vocabulary.
x = np.array(list(vocab_processor.fit_transform(x_text)))
## Extract word:id mapping from the object.
vocab_dict = vocab_processor.vocabulary_._mapping
## Sort the vocabulary dictionary on the basis of values(id).
## Both statements perform same task.
#sorted_vocab = sorted(vocab_dict.items(), key=operator.itemgetter(1))
sorted_vocab = sorted(vocab_dict.items(), key = lambda x : x[1])
## Treat the id's as index into list and create a list of words in the ascending order of id's
## word with id i goes at index i of the list.
vocabulary = list(list(zip(*sorted_vocab))[0])
# print(vocabulary)
# print(x)
for v in vocabulary:
print(v)
mommi84 commented
Please ignore that file. All Python files are in the root folder as of NSpM v1.0.