dawenl/vae_cf

Error in python 3.5+

karthikraja95 opened this issue · 7 comments

Hi all,

Is there a way to get the hit ratio along with ndcg from the code? Any help would be appreciated.

Thanks

In the file 'eval_funcs.py' there's the computation of the NDCG. The input to the HR is the same as the one to the NDCG. You could implement a function with the same header and then call it where the NDCG is called.

Another question, when I try to load the train.csv file. I get the following error. Any idea why? and how to resolve it?

Screen Shot 2019-07-10 at 4 06 38 PM

Solved the above issue. Now I have another issue. I am using python 3.5+ and when I ran the following block. I get this error and I have attached the traceback here. Any help would be appreciated.

Code:

np.seterr(divide='ignore', invalid='ignore') #Added by me
ndcgs_vad = []

with tf.Session() as sess:

init = tf.global_variables_initializer()
sess.run(init)

best_ndcg = -np.inf

update_count = 0.0

for epoch in list(range(n_epochs)):
    np.random.shuffle(list(idxlist))
    # train for one epoch
    for bnum, st_idx in enumerate(range(0, N, batch_size)):
        end_idx = min(st_idx + batch_size, N)
        X = train_data[idxlist[st_idx:end_idx]]
        
        if sparse.isspmatrix(X):
            X = X.toarray()
        X = X.astype('float32')           
        
        if total_anneal_steps > 0:
            anneal = min(anneal_cap, 1. * update_count / total_anneal_steps)
        else:
            anneal = anneal_cap
        
        feed_dict = {vae.input_ph: X, 
                     vae.keep_prob_ph: 0.5, 
                     vae.anneal_ph: anneal,
                     vae.is_training_ph: 1}        
        sess.run(train_op_var, feed_dict=feed_dict)

        if bnum % 100 == 0:
            summary_train = sess.run(merged_var, feed_dict=feed_dict)
            summary_writer.add_summary(summary_train, 
                                       global_step=epoch * batches_per_epoch + bnum) 
        
        update_count += 1
    
    # compute validation NDCG
    ndcg_dist = []
    for bnum, st_idx in enumerate(range(0, N_vad, batch_size_vad)):
        end_idx = min(st_idx + batch_size_vad, N_vad)
        X = vad_data_tr[idxlist_vad[st_idx:end_idx]]

        if sparse.isspmatrix(X):
            X = X.toarray()
        X = X.astype('float32')
    
        pred_val = sess.run(logits_var, feed_dict={vae.input_ph: X} )
        # exclude examples from training and validation (if any)
        pred_val[X.nonzero()] = -np.inf
        ndcg_dist.append(NDCG_binary_at_k_batch(pred_val, vad_data_te[idxlist_vad[st_idx:end_idx]]))
    
    ndcg_dist = np.concatenate(ndcg_dist)
    ndcg_ = ndcg_dist.mean()
    ndcgs_vad.append(ndcg_)
    merged_valid_val = sess.run(merged_valid, feed_dict={ndcg_var: ndcg_, ndcg_dist_var: ndcg_dist})
    summary_writer.add_summary(merged_valid_val, epoch)

    # update the best model (if necessary)
    if ndcg_ > best_ndcg:
        saver.save(sess, '{}/model'.format(chkpt_dir))
        best_ndcg = ndcg_

Error Traceback:


InvalidArgumentError Traceback (most recent call last)
~.conda\envs\reco_base\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1333 try:
-> 1334 return fn(*args)
1335 except errors.OpError as e:

~.conda\envs\reco_base\lib\site-packages\tensorflow\python\client\session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
1318 return self._call_tf_sessionrun(
-> 1319 options, feed_dict, fetch_list, target_list, run_metadata)
1320

~.conda\envs\reco_base\lib\site-packages\tensorflow\python\client\session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
1406 self._session, options, feed_dict, fetch_list, target_list,
-> 1407 run_metadata)
1408

InvalidArgumentError: Nan in summary histogram for: ndcg_at_k_hist_validation
[[{{node ndcg_at_k_hist_validation}} = HistogramSummary[T=DT_DOUBLE, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ndcg_at_k_hist_validation/tag, _arg_Placeholder_1_0_0)]]

During handling of the above exception, another exception occurred:

InvalidArgumentError Traceback (most recent call last)
in
58 ndcg_ = ndcg_dist.mean()
59 ndcgs_vad.append(ndcg_)
---> 60 merged_valid_val = sess.run(merged_valid, feed_dict={ndcg_var: ndcg_, ndcg_dist_var: ndcg_dist})
61 summary_writer.add_summary(merged_valid_val, epoch)
62

~.conda\envs\reco_base\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
927 try:
928 result = self._run(None, fetches, feed_dict, options_ptr,
--> 929 run_metadata_ptr)
930 if run_metadata:
931 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~.conda\envs\reco_base\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1150 if final_fetches or final_targets or (handle and feed_dict_tensor):
1151 results = self._do_run(handle, final_targets, final_fetches,
-> 1152 feed_dict_tensor, options, run_metadata)
1153 else:
1154 results = []

~.conda\envs\reco_base\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1326 if handle is None:
1327 return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1328 run_metadata)
1329 else:
1330 return self._do_call(_prun_fn, handle, feeds, fetches)

~.conda\envs\reco_base\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1346 pass
1347 message = error_interpolation.interpolate(message, self._graph)
-> 1348 raise type(e)(node_def, op, message)
1349
1350 def _extend_graph(self):

InvalidArgumentError: Nan in summary histogram for: ndcg_at_k_hist_validation
[[node ndcg_at_k_hist_validation (defined at :9) = HistogramSummary[T=DT_DOUBLE, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ndcg_at_k_hist_validation/tag, _arg_Placeholder_1_0_0)]]

Caused by op 'ndcg_at_k_hist_validation', defined at:
File "C:\Users\Karthik.conda\envs\reco_base\lib\runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "C:\Users\Karthik.conda\envs\reco_base\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\ipykernel_launcher.py", line 16, in
app.launch_new_instance()
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
app.start()
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
self.io_loop.start()
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tornado\platform\asyncio.py", line 148, in start
self.asyncio_loop.run_forever()
File "C:\Users\Karthik.conda\envs\reco_base\lib\asyncio\base_events.py", line 438, in run_forever
self._run_once()
File "C:\Users\Karthik.conda\envs\reco_base\lib\asyncio\base_events.py", line 1451, in _run_once
handle._run()
File "C:\Users\Karthik.conda\envs\reco_base\lib\asyncio\events.py", line 145, in _run
self._callback(*self._args)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tornado\ioloop.py", line 690, in
lambda f: self._run_callback(functools.partial(callback, future))
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tornado\ioloop.py", line 743, in _run_callback
ret = callback()
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tornado\gen.py", line 781, in inner
self.run()
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tornado\gen.py", line 742, in run
yielded = self.gen.send(value)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\ipykernel\kernelbase.py", line 378, in dispatch_queue
yield self.process_one()
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tornado\gen.py", line 225, in wrapper
runner = Runner(result, future, yielded)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tornado\gen.py", line 708, in init
self.run()
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tornado\gen.py", line 742, in run
yielded = self.gen.send(value)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\ipykernel\kernelbase.py", line 365, in process_one
yield gen.maybe_future(dispatch(*args))
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tornado\gen.py", line 209, in wrapper
yielded = next(result)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\ipykernel\kernelbase.py", line 272, in dispatch_shell
yield gen.maybe_future(handler(stream, idents, msg))
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tornado\gen.py", line 209, in wrapper
yielded = next(result)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\ipykernel\kernelbase.py", line 542, in execute_request
user_expressions, allow_stdin,
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tornado\gen.py", line 209, in wrapper
yielded = next(result)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\IPython\core\interactiveshell.py", line 2848, in run_cell
raw_cell, store_history, silent, shell_futures)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\IPython\core\interactiveshell.py", line 2874, in _run_cell
return runner(coro)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
coro.send(None)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\IPython\core\interactiveshell.py", line 3049, in run_cell_async
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\IPython\core\interactiveshell.py", line 3214, in run_ast_nodes
if (yield from self.run_code(code, result)):
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\IPython\core\interactiveshell.py", line 3296, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "", line 9, in
ndcg_dist_summary = tf.summary.histogram('ndcg_at_k_hist_validation', ndcg_dist_var)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tensorflow\python\summary\summary.py", line 187, in histogram
tag=tag, values=values, name=scope)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tensorflow\python\ops\gen_logging_ops.py", line 283, in histogram_summary
"HistogramSummary", tag=tag, values=values, name=name)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tensorflow\python\framework\ops.py", line 3274, in create_op
op_def=op_def)
File "C:\Users\Karthik.conda\envs\reco_base\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in init
self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Nan in summary histogram for: ndcg_at_k_hist_validation
[[node ndcg_at_k_hist_validation (defined at :9) = HistogramSummary[T=DT_DOUBLE, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ndcg_at_k_hist_validation/tag, _arg_Placeholder_1_0_0)]]

When I print

sess.run(train_op_var, feed_dict=feed_dict)

in the above code.

I get None

Any help would be appreciated. I am using a different dataset but I made it exactly same movielens dataset except I didn't have a timestamp. I have userid, custid, ratings

I get the same error and I don't understand why.
Did you manage to solve it?

I have the same error too.
Have you found a solution to figure it out yet?