wzc570738205/smartParsePro

省市县数据过期了。。有些区县被撤,或者改名字

sleepm opened this issue · 0 comments

比如无锡梁溪区
所以对应邮编数据也是有无效的。。

(function() {
    'use strict';
    var citydata = localStorage.citydata ? JSON.parse(localStorage.citydata) : [];
    var province_url = localStorage.province_url ? JSON.parse(localStorage.province_url) : [];
    var city_url = localStorage.city_url ? JSON.parse(localStorage.city_url) : [];
    var town_url = localStorage.town_url ? JSON.parse(localStorage.town_url) : [];
    var step = localStorage.step ? JSON.parse(localStorage.step) : 1;
    function in_array(v, array, k = false){
        var i;
        for(i in array){
            if(k){
                if(array[i][k] == v){
                    return true;
                }
            }else{
                if(array[i] == v){
                    return true;
                }
            }
        }
        return false;
    }
    function find_index(v, array, k =false){
        var i;
        for(i in array){
            if(k){
                if(array[i][k] == v){
                    return i;
                }
            }else{
                if(array[i] == v){
                    return i;
                }
            }
        }
    }
    function delete_in_array(v, array){
        var i;
        for(i in array){
            if(array[i] == v){
                delete array[i];
            }
        }
        return array;
    }
    function getRandomInt(min, max) {
        min = Math.ceil(min);
        max = Math.floor(max);
        return Math.floor(Math.random() * (max - min)) + min; //The maximum is exclusive and the minimum is inclusive
      }
    function process_province(){
        var a = document.querySelectorAll('table')[2].querySelectorAll('td a');
        var i;
        for(i in a){
            var one = {};
            if(a[i].href){
                one.code = a[i].href.substr(54).replace('.html', '');
                one.name = a[i].text;
                one.children = [];
                if(one.code != '' && one.children.length == 0){
                    //http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/22.html
                    if(!in_array(one.code, citydata, 'code')){
                        citydata.push(one);
                    }
                    if(!in_array(a[i].href, province_url)){
                        province_url.push(a[i].href);
                    }
                }
            }
        }
        localStorage.setItem('province_url', JSON.stringify(province_url));
        localStorage.setItem('citydata', JSON.stringify(citydata));
    }

    function process_city(){
        var city_index = location.href.substr(-7, 2);
        citydata = JSON.parse(localStorage.citydata);
        var city_in_citydata_index = find_index(city_index, citydata, 'code');
        var city = citydata[city_in_citydata_index];
        var a = document.querySelectorAll('table')[2].querySelectorAll('td a');
        var ii;
        for(ii in a){
            var one = {};
            if(a[ii].text){
                if(a[ii].text.match(/(\D|\W)/)){
                    one.code = a[ii].href.substr(57).replace('.html', '');
                    one.name = a[ii].text;
                    one.children = [];
                    if(!in_array(one.code, city.children, 'code')){
                        city.children.push(one);
                    }
                    if(!in_array(a[ii].href, city_url)){
                        city_url.push(a[ii].href);
                    }
                }
            }
        }
        citydata[city_in_citydata_index] = city;
        localStorage.setItem('city_url', JSON.stringify(city_url));
        localStorage.setItem('citydata', JSON.stringify(citydata));
    }
    function process_town(){
        var city_index = location.href.substr(-12, 2);
        var town_index = location.href.substr(-9, 4);
        citydata = JSON.parse(localStorage.citydata);
        var city_in_citydata_index = find_index(city_index, citydata, 'code');
        var town_in_citydata_index = find_index(town_index, citydata[city_in_citydata_index].children, 'code');
        var town = citydata[city_in_citydata_index].children[town_in_citydata_index];
        var a = document.querySelectorAll('table')[2].querySelectorAll('td a');
        var iii;
        for(iii in a){
            var one = {};
            if(a[iii].text){
                if(a[iii].text.match(/(\D|\W)/)){
                    one.code = a[iii].href.substr(60).replace('.html', '');
                    one.name = a[iii].text;
                    if(!in_array(one.code, town.children, 'code')){
                        town.children.push(one);
                    }
                    if(!in_array(a[iii].href, town_url)){
                        town_url.push(a[iii].href);
                    }
                }
            }
        }
        citydata[city_in_citydata_index].children[town_in_citydata_index] = town;
        localStorage.setItem('town_url', JSON.stringify(town_url));
        localStorage.setItem('citydata', JSON.stringify(citydata));
    }

    function loop(arr, save = false){
        setTimeout(function(){
            var v = arr.pop();
            wo = window.open(v, '_blank');
            wo.addEventListener("message", process_message, false);
            if(arr.length > 0){
                if(save){
                    localStorage.setItem(save, JSON.stringify(arr));
                }
                loop(arr);
            }else{
                console.log('loop done');
            }
        }, getRandomInt(1500, 5000));
    }

    function process_message(msg){
        if(msg.data == 'close'){
            wo.close();
        }
    }
    //http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html
    if(document.location.href == 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html'){
        var wo;
        if(step == 1){
            process_province();
            step = 2;
            localStorage.setItem('step', 2);
        }
        if(step == 2){
            loop(province_url, 'province_url');
            step = 3;
            localStorage.setItem('step', 3);
        }
        if(step == 3){
            loop(city_url, 'city_url');
            // for(var c in city_url){
            //     wo = window.open(city_url[c], '_blank');
            // }
        }
    }

    if(document.location.href.match(/tjyqhdmhcxhfdm\/2019\/[0-9]{2}.html/)){
        process_city();
        window.postMessage('close', "http://www.stats.gov.cn");
    }

    if(document.location.href.match(/tjyqhdmhcxhfdm\/2019\/[0-9]{2}\/[0-9]{4}.html/)){
        process_town();
        window.postMessage('close', "http://www.stats.gov.cn");
    }
})();
//维基抓邮编
//https://zh.wikipedia.org/wiki/%E4%B8%AD%E5%8D%8E%E4%BA%BA%E6%B0%91%E5%85%B1%E5%92%8C%E5%9B%BD%E5%A2%83%E5%86%85%E5%9C%B0%E5%8C%BA%E9%82%AE%E6%94%BF%E7%BC%96%E7%A0%81%E5%88%97%E8%A1%A8
    // 会有重名
    var td = $('.wikitable').find('td');
    $('s').remove(); // 已删除的
    $('td[colspan=8]').remove();
    var postcode = {};
    for(var i in td){
        if(td[i].textContent && !td[i].textContent.match(/\w/)){// 地区
            if(td[i].textContent.replace("\n", '').length == 0)continue;
            if(td[i].textContent.length == 0)continue;
            var index = parseInt(i) + 1;
            if(!td[index]);
            var code = td[index].textContent.replace("\n", '');
            var area = td[i].textContent.replace(" ", '');
            var temp = area;
            if(!area.match(/(市|县|区)$/)){
                postcode[area] = code;
                area = temp + '市';
                postcode[area] = code;
                area = temp + '县';
                postcode[area] = code;
                area = temp + '区';
                postcode[area] = code;
                area = temp.replace('县', '市');
                postcode[area] = code;
                area = temp.replace('县', '');
                postcode[area] = code;
            }else{
                if(postcode[area]){
                    console.error(area);
                }else{
                    postcode[area] = code;
                }
            }
        }
    }
    console.log(JSON.stringify(postcode));

这是我写的油猴脚本,自己用可以。。
一个窗口抓链接,然后循环弹新窗去抓新链接
邮编除了维基里的,其他只能列出所有为空的地名,再去百度。。