Anorov/cloudflare-scrape

Error getting html of page

ahmedmostafa0x61 opened this issue · 2 comments

Before creating an issue, first upgrade cfscrape with pip install -U cfscrape and see if you're still experiencing the problem. Please also confirm your Node version (node --version or nodejs --version) is version 10 or higher.

Make sure the website you're having issues with is actually using anti-bot protection by Cloudflare and not a competitor like Imperva Incapsula or Sucuri. And if you're using an anonymizing proxy, a VPN, or Tor, Cloudflare often flags those IPs and may block you or present you with a captcha as a result.

Please confirm the following statements and check the boxes before creating an issue:

  • I've upgraded cfscrape with pip install -U cfscrape
  • I'm using Node version 10 or higher
  • The site protection I'm having issues with is from Cloudflare
  • I'm not using Tor, a VPN, or an anonymizing proxy

Python version number

Run python --version and paste the output below:

Python 3.6.8

cfscrape version number

Run pip show cfscrape and paste the output below:

Version: 2.1.1

Code snippet involved with the issue

import cfscrape
URL = "https://eg.mycima.co/%d9%85%d8%b4%d8%a7%d9%87%d8%af%d8%a9-%d9%85%d8%b3%d9%84%d8%b3%d9%84-%d8%a7%d9%84%d9%86%d9%87%d8%a7%d9%8a%d8%a9-%d8%ad%d9%84%d9%82%d8%a9-1/"

scraper = cfscrape.create_scraper()
print(scraper.get(URL).content)

Complete exception and traceback

(If the problem doesn't involve an exception being raised, leave this blank)

Traceback (most recent call last):
  File "C:\Python\lib\site-packages\cfscrape\__init__.py", line 255, in solve_challenge
    javascript, flags=re.S
AttributeError: 'NoneType' object has no attribute 'groups'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:/Users/Ahmed Mostafa/.PyCharmCE2018.1/config/scratches/scratch_76.py", line 8, in <module>
    print(scraper.get(URL).content)
  File "C:\Python\lib\site-packages\requests\sessions.py", line 546, in get
    return self.request('GET', url, **kwargs)
  File "C:\Python\lib\site-packages\cfscrape\__init__.py", line 129, in request
    resp = self.solve_cf_challenge(resp, **kwargs)
  File "C:\Python\lib\site-packages\cfscrape\__init__.py", line 204, in solve_cf_challenge
    answer, delay = self.solve_challenge(body, domain)
  File "C:\Python\lib\site-packages\cfscrape\__init__.py", line 292, in solve_challenge
    % BUG_REPORT
ValueError: Unable to identify Cloudflare IUAM Javascript on website. Cloudflare may have changed their technique, or there may be a bug in the script.

Please read https://github.com/Anorov/cloudflare-scrape#updates, then file a bug report at https://github.com/Anorov/cloudflare-scrape/issues."

URL of the Cloudflare-protected page

[eg.mycima.co]

URL of Pastebin/Gist with HTML source of protected page

[view-source:https://eg.mycima.co/%d9%85%d8%b4%d8%a7%d9%87%d8%af%d8%a9-%d9%85%d8%b3%d9%84%d8%b3%d9%84-%d8%a7%d9%84%d9%86%d9%87%d8%a7%d9%8a%d8%a9-%d8%ad%d9%84%d9%82%d8%a9-1/]

Noone looking into this ?

The javascript code I get is:

//<![CDATA[
(function(){

var a = function() {try{return !!window.addEventListener} catch(e) {return !1} },
b = function(b, c) {a() ? document.addEventListener("DOMContentLoaded", b, c) : document.attachEvent("onreadystatechange", b)};
b(function(){
  var cookiesEnabled=(navigator.cookieEnabled)? true : false;
  var cookieSupportInfix=cookiesEnabled?'/nocookie':'/cookie';
  var a = document.getElementById('cf-content');a.style.display = 'block';
  var isIE = /(MSIE|Trident\/|Edge\/)/i.test(window.navigator.userAgent);
  var trkjs = isIE ? new Image() : document.createElement('img');
  trkjs.setAttribute("src", "/cdn-cgi/images/trace/jschal/js"+cookieSupportInfix+"/transparent.gif?ray=59af47b9c81c0b5b");
  trkjs.id = "trk_jschal_js";
  trkjs.setAttribute("alt", "");
  document.body.appendChild(trkjs);

  setTimeout(function(){
    var s,t,o,p, b,r,e,a,k,i,n,g,f, RGQhuJk={"z":+((!+[]+(!![])+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+(!![])-[])+(!+[]-(!![]))+(!+[]+(!![])+!![]+!![])+(+!![])+(!+[]+(!![])+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+(!![])+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+(!![])-[])+(!+[]+(!![])+!![]))/+((!+[]+(!![])+!![]+!![]+[])+(!+[]+(!![])+!![]+!![]+!![]+!![]+!![])+(+!![])+(+!![])+(!+[]+(!![])+!![])+(!+[]+(!![])+!![]+!![])+(!+[]+(!![])+!![]+!![])+(+!![])+(!+[]+(!![])+!![]+!![]+!![]+!![]+!![]+!![]+!![]))};
    g = String.fromCharCode;
    o = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
    e = function(s) {
      s += "==".slice(2 - (s.length & 3));
      var bm, r = "", r1, r2, i = 0, r3 = '<span class="cf-error-code">1020</span>';
      for (; i < s.length;) {
          bm = o.indexOf(s.charAt(i++)) << 18 | o.indexOf(s.charAt(i++)) << 12
                  | (r1 = o.indexOf(s.charAt(i++))) << 6 | (r2 = o.indexOf(s.charAt(i++)));
          r += r1 === 64 ? g(bm >> 16 & 255)
                  : r2 === 64 ? g(bm >> 16 & 255, bm >> 8 & 255)
                  : g(bm >> 16 & 255, bm >> 8 & 255, bm & 255);
      }
      return r;
    };
    t = document.createElement('div');
    t.innerHTML="<a href='/'>x</a>";
    t = t.firstChild.href;r = (setInterval(function(){}, 100),t.match(/https?:\/\//)[0]);
    t = t.substr(r.length); t = t.substr(0,t.length-1); k = 'nvXtXCab';
    a = document.getElementById('jschl+answer'.replace('+', '-'));
    f = document.getElementById('challenge-form'); a.value = '';
    ;RGQhuJk.zhuJk.zhuJk.z+=+((!+[]+(!![])+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+(!![])+!![]+!![])+(+!![])+(!+[]-(!![]))+(!+[]+(!![])+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+(!![])+!![])+(!+[]+(!![])+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+(!![])+!![]+!![]+!![]+!![]+!![]+!![])+(+!![]))/+((!+[]+(!![])-[]+[])+(!+[]+(!![])+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+(!![])+!![])+(!+[]-(!![]))+(!+[]+(!![])+!![]+!![])+(!+[]+(!![])-[])+(!+[]+(!![])+!![]+!![]+!![]+!![])+(!+[]+(!![])+!![]+!![]+!![]+!![])+(!+[]+(!![])-[]));RGQhuJk.zfunction(p){return eval((true+"")[0]+".ch"+(false+"")[1]+(true+"")[1]+Function("return escape")()(("")["italics"]())[2]+"o"+(undefined+"")[2]+(true+"")[3]+"A"+(true+"")[0]+"("+p+")")}(+((!+[]+(!![])+!![]+!![]+[])))));RGQhuJk.zhuJk.zhuJk.zhuJk.za.value = (+RGQhuJk.z).toFixed(10); '; 121'
    f.action += location.hash;
    return f.submit();
  },4000); /*eoc*/

}, false);

})();
//]]>

The regular expression in line 251 is too obfuscated for me to decipher.