把查询语句填好,修改一下cookie,建议一天不要采集过多的,会被封号的,一天就2w把!每个查询语句最多是1w条记录。

#!/usr/bin/env python
#coding:utf-8
#author: IversOn5
from base64 import b64encode
import requests
from sys import argv
import re
from random import randint
from time import sleep

queryList=[
'app="wordpress" && port=80 && country=US && region=California && server=="Apache"',
'app="wordpress" && port=80 && country=US && region=California && server=="cloudflare-nginx"'
]
#bquery='YXBwPSJWaWdvci1Sb3V0ZXIiICYmIGNvdW50cnk9R0IgICYmIHBvcnQ9ODAgJiYgcmVnaW9uPUxvbmRvbiwgQ2l0eSBvZiAmJiAtcG9ydD00NDM=   '
"""
app="wordpress" && port=8080  && -country=CN
"""
def spider(query):
    #print query
    counter = 0
    rex = re.compile('<a href="javascript:view(.*?)">')
    cookies={'_fofapro_ars_session':'2e8c1a80cb584f1b689ce9046cfe4998'}
    for page in range(1,1001):
        print 'Current query is: '+ query
        print u'\033[1;31;40m%spage - - \033[0m'% str(page)
        x = requests.get('https://fofa.so/result?page=%s&qbase64=%s'% (str(page),b64encode(query)),cookies=cookies)
        #x = requests.get('https://fofa.so/result?page=%s&qbase64=%s'% (str(page),bquery),cookies=cookies)
        html = x.text.encode('utf-8')
        if 'retry' in html:
            sleep(randint(10,15))
            x = requests.get('https://fofa.so/result?page=%s&qbase64=%s'% (str(page),b64encode(query)),cookies=cookies)
            html = x.text.encode('utf-8')
        ip = rex.findall(html)
        if not ip:
            counter = counter + 1
            if counter == 5: 
                break
        if ip:
            counter = 0
            for i in ip:
                ips = i[2:-2]
                #print ips
                https_geturl(ips)
            sleep(randint(3,5))
def https_geturl(ip_re):
    if 'https' not in ip_re:
        write(ip_re)
        print ip_re
        # if 'http://' not in ip_re:
        #   try:
        #       temp = requests.get('http://'+ip_re)
        #       url = temp.url.split('/')[2]
        #       print url
        #       write(url)
        #   except Exception,e:
        #       pass    
        # else:
        #   try:
        #       temp = requests.get(ip_re)
        #       url = temp.url.split('/')[2]
        #       print url 
        #       write(url)
        #   except Exception,e:
        #       pass
def write(ip):
    with open('wp_US.txt','a') as ipw:
        ipw.write(ip+'\n')
if __name__== "__main__":
    for query in queryList:
        spider(query)

发表评论

电子邮件地址不会被公开。 必填项已用*标注