getSourceCode HTTP 403 Error
Opened this issue · 1 comments
In crawler.py;
For the URL http://scholar.harvard.edu/rogoff/contact_owner, it returns the error below:
Traceback (most recent call last):
File "C:/Users/Yasar/Documents/PyCharm Projects/bil372/main.py", line 21, in
print(c.run())
File "C:\Users\Yasar\Documents\PyCharm Projects\bil372\crawler.py", line 78, in run
self.traverse()
File "C:\Users\Yasar\Documents\PyCharm Projects\bil372\crawler.py", line 62, in traverse
self.__p = Parser(source=self.get_source_code(URL), URL=URL)
File "C:\Users\Yasar\Documents\PyCharm Projects\bil372\crawler.py", line 24, in get_source_code
response = urllib.request.urlopen(request)
File "C:\Python34\lib\urllib\request.py", line 161, in urlopen
return opener.open(url, data, timeout)
File "C:\Python34\lib\urllib\request.py", line 469, in open
response = meth(req, response)
File "C:\Python34\lib\urllib\request.py", line 579, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python34\lib\urllib\request.py", line 507, in error
return self._call_chain(_args)
File "C:\Python34\lib\urllib\request.py", line 441, in _call_chain
result = func(_args)
File "C:\Python34\lib\urllib\request.py", line 587, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
http://scholar.harvard.edu/rogoff/home
Traceback (most recent call last):
File "/home/wdnch/PycharmProjects/bil372/main.py", line 21, in
print(c.run())
File "/home/wdnch/PycharmProjects/bil372/crawler.py", line 78, in run
self.traverse()
File "/home/wdnch/PycharmProjects/bil372/crawler.py", line 62, in traverse
self.__p = Parser(source=self.get_source_code(URL), URL=URL)
File "/home/wdnch/PycharmProjects/bil372/crawler.py", line 24, in get_source_code
response = urllib.request.urlopen(request)
File "/usr/lib/python3.4/urllib/request.py", line 161, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.4/urllib/request.py", line 469, in open
response = meth(req, response)
File "/usr/lib/python3.4/urllib/request.py", line 579, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.4/urllib/request.py", line 507, in error
return self._call_chain(_args)
File "/usr/lib/python3.4/urllib/request.py", line 441, in _call_chain
result = func(_args)
File "/usr/lib/python3.4/urllib/request.py", line 587, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
Process finished with exit code 1