---- 淘宝优惠券 ----欢迎到论坛交流 ---域名问题某些图片和js资源无法访问,导致一些代码实例无法运行!(代码里gzui.net换成momen.vip即可)

采集1024贴图区,并自动发布到WordPress博客

前端开发 蚂蚁 44℃ 0评论

https://www.moerats.com/archives/534/

https://github.com/ishkong/1024_img_spider_wordpress

import threading
import mimetypes,json
import requests
import re,os,time,pymysql

class wordpress_post:
    def __init__(self,tittle,content):
        self.tittle=tittle
        self.content=content
    def mysql_con(self):
        conn = pymysql.connect(host='', port=3306, user='', passwd='', db='', charset='utf8') #数据库填这里
        return conn
    def up(self):
        times=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
        sql="INSERT INTO wp_posts(post_author,post_date,post_content,post_title,post_excerpt,post_status,comment_status,ping_status,post_name,to_ping,pinged,post_modified,post_content_filtered,post_parent,menu_order,post_type,comment_count) VALUES ('1','%s','%s','%s','','publish','open','open','%s','','','%s','','0','0','post','0')" % (str(times),str(self.content),str(self.tittle),str(self.tittle),str(times))
        return sql
    def cat(self,ids,cat):
        sql="INSERT INTO wp_term_relationships(object_id,term_taxonomy_id,term_order) VALUES (%s,%s,'0')"%(ids,cat)
        return sql
    def close_mysql(self,cursor,conn):
        conn.commit()
        cursor.close()
        conn.close()

def upload(files):
    APIKey = "" #API填这里
    format = "json"
    url = "http://你的域名/api/1/upload/?key="+ APIKey + "&format=" + format #图床地址
    r = requests.post(url, files = files)
    return json.loads(r.text)

def formatSource(filename):
    imageList = []
    type = mimetypes.guess_type(filename)[0]
    imageList.append(('source' , (filename , open(filename , 'rb') , type)))
    return imageList

class myThread (threading.Thread):
    def __init__(self, url, dir, filename):
        threading.Thread.__init__(self)
        self.threadID = filename
        self.url = url
        self.dir = dir
        self.filename=filename
    def run(self):
        download_pic(self.url,self.dir,self.filename)

def download_pic(url,dir,filename):
    headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36Name','Referer':'https://t66y.com'}
    req=requests.get(url=url,headers=headers)
    if req.status_code==200:
        with open('temp/'+str(filename)+'.jpg','wb') as f:
            f.write(req.content)

def upload_img(page,tittle):
    for i in range(int(page)):
        file_s='temp/'+str(i)+'.jpg'
        print('开始上传图片:'+file_s)
        try:
            b=upload(formatSource(file_s))
        except:
            continue
        os.remove(file_s)
        img_hc='<img src="'+b['image']['url']+'">'
        with open('temp/temp.txt','a+') as f:
            f.write(img_hc)

def post_article(tittle):
    with open('temp/temp.txt','r') as f:
        wz_content=f.read()
    with open('temp/tj.txt','r') as f:
        wzs=f.read()
    k=int(wzs)
    a=wordpress_post(str(tittle[0]),wz_content)
    conn=a.mysql_con()
    cursor = conn.cursor()
    c=a.up()
    effect_row = cursor.execute(c)
    new_id = cursor.lastrowid
    d=a.cat(new_id,'1') #文章分类,去wp_term_taxonomy里找你需要的id
    effect_row = cursor.execute(d)
    a.close_mysql(cursor,conn)
    print('文章已发布:'+tittle[0])
    os.remove('temp/temp.txt')
    k+=1
    j=str(k)
    with open('temp/tj.txt','w+') as f:
        f.write(j)

def main():
    flag=1
    while flag<=270:
        base_url='https://t66y.com/'
        page_url='https://t66y.com/thread0806.php?fid=8&search=&page='+str(flag)
        get=requests.get(page_url)
        article_url=re.findall(r'<h3><a href="(.*)" target="_blank" id="">(?!<.*>).*</a></h3>',str(get.content,'gbk',errors='ignore'))
        for url in article_url:
            threads=[]
            tittle=['default']
            getpage=requests.get(str(base_url)+str(url))
            tittle=re.findall(r'<h4>(.*)</h4>',str(getpage.content,'gbk',errors='ignore'))
            file=tittle[0]
            img_url=re.findall(r'<input src=\'(.*?)\'',str(getpage.content,'gbk',errors='ignore'))
            filename=1
            print('开始下载:'+file)
            for download_url in img_url:
                thread=myThread(download_url,file,filename)
                thread.start()
                threads.append(thread)
                filename=filename+1
            for t in threads:
                t.join()
            print('下载完成,共'+str(filename)+'张图片')
            print('开始上传图片')
            try:
                upload_img(filename,tittle)
            except:
                print('图床错误')
                continue
            post_article(tittle)
            time.sleep(60)
        print('第'+str(flag)+'页下载完成')
        flag=flag+1

if __name__=='__main__':
    try:
        if os.path.exists('temp')==False:
            os.makedirs('temp')
            f=open('temp/tj.txt','w+')
            f.close()
            with open('temp/tj.txt','w+') as f:
                f.write('0')
            main()
        else:
            main()
    except:
        print('主程序出错,请重新运行')

#tj.txt里记录了采集了多少篇,可以用来更新wp_term_taxonomy中对应项的count值(如果这个分类里只有采集的话)

转载请注明:有爱前端 » 采集1024贴图区,并自动发布到WordPress博客

喜欢 (0)or分享 (0)
发表我的评论
取消评论

表情