之前写的对接飞书的接口发送消息,顺便爬取一下博客把文章 title 和 link 爬取出来集成到富文本中。
- 注意的是只能爬取 solo 的内置皮肤 pingsu 里面的文章
import urllib.request
from lxml import etree
import re
class solo():
def __init__(self,url):
self.url = url
self.Article_num = solo.get_ArticlesNums()
def Theme(self):
'''
检测solo皮肤,为Casper的话返回值,否则为空字符串
:return:
'''
response = urllib.request.urlopen(self.url)
html = response.read().decode()
theme = (re.search('Casper|Pinghsu', html)).group()
return theme
def get_casper(self):
titles_list = []
links_list = []
for i in range(1, self.Article_num + 1):
url_1 = "https://www.cjzshilong.cn/?p=%s" % i # 分页数里面的链接
response_1 = urllib.request.urlopen(url_1)
html_1 = response_1.read().decode()
parseHtml_1 = etree.HTML(html_1)
article_num = len(parseHtml_1.xpath('//*[@id="pjax"]/div/div/article'))
for n in range(1, article_num + 1): # 遍历单个页数的文章标题
titles = parseHtml_1.xpath('//*[@id="pjax"]/div/div/article[%s]/div/h2/a/text()' % n)
links = parseHtml_1.xpath('//*[@id="pjax"]/div/div/article[%s]/div/h2/a//@href' % n)
new_titles = ''.join(titles).strip().rsplit('\n')
links_list.append(links)
titles_list.append(new_titles)
return titles_list, links_list
@staticmethod
def get_ArticlesNums():
response = urllib.request.urlopen(url)
html = response.read().decode()
parseHtml = etree.HTML(html)
all_article = len(parseHtml.xpath('//*[@id="pjax"]/div/nav/a')) + 1 # 全部分页数
return all_article
if __name__ == '__main__':
url = "https://www.cjzshilong.cn"
info = solo(url=url)
theme = info.Theme()
if theme == 'Pinghsu':
print('123')
else:
print('Casper')
# !/bin/env python3
########################################################
# This script is to send emails to Lijuan regularly #
# Date: 2020-2-24 #
# Author: cuijianzhe #
# Email: 598941324@qq.com #
########################################################
import urllib.request
from lxml import etree
from bs4 import BeautifulSoup
import requests
import json
import os
import random
import re
class solo():
def __init__(self,url):
self.url = url
self.Article_num = solo.get_ArticlesNums()
def Theme(self):
'''
检测solo皮肤,为Casper或者Pinghsu的话返回对应的字符串
:return:
'''
response = urllib.request.urlopen(self.url)
html = response.read().decode()
theme = (re.search('Casper|Pinghsu', html)).group()
return theme
def get_pingsu(self):
titles_list = []
links_list = []
for i in range(1, self.Article_num + 1):
url_1 = "https://www.cjzshilong.cn/?p=%s" % i # 分页数里面的链接
response_1 = urllib.request.urlopen(url_1)
html_1 = response_1.read().decode()
parseHtml_1 = etree.HTML(html_1)
article_num = len(parseHtml_1.xpath('//*[@id="pjax"]/div/main/article'))
for n in range(1, article_num + 1):
titles = parseHtml_1.xpath('//*[@id="pjax"]/div/main/article[%s]/div/div[5]/h2/a/text()' % n)
links = parseHtml_1.xpath('//*[@id="pjax"]/div/main/article[%s]/div/div[5]/h2/a//@href' % n)
new_titles = ''.join(titles).strip().rsplit('\n')
links_list.append(links)
titles_list.append(new_titles)
return titles_list, links_list
def get_casper(self):
titles_list = []
links_list = []
for i in range(1, self.Article_num + 1):
url_1 = "https://www.cjzshilong.cn/?p=%s" % i # 分页数里面的链接
response_1 = urllib.request.urlopen(url_1)
html_1 = response_1.read().decode()
parseHtml_1 = etree.HTML(html_1)
article_num = len(parseHtml_1.xpath('//*[@id="pjax"]/div/div/article'))
for n in range(1, article_num + 1):
titles = parseHtml_1.xpath('//*[@id="pjax"]/div/div/article[%s]/div/h2/a/text()' % n)
links = parseHtml_1.xpath('//*[@id="pjax"]/div/div/article[%s]/div/h2/a//@href' % n)
new_titles = ''.join(titles).strip().rsplit('\n')
links_list.append(links)
titles_list.append(new_titles)
return titles_list, links_list
@staticmethod
def get_ArticlesNums():
response = urllib.request.urlopen(url)
html = response.read().decode()
parseHtml = etree.HTML(html)
# 遍历单个页数的文章标题
# for i in range(1,len(article_num)):
# titles = parseHtml.xpath('//*[@id="pjax"]/div/main/article[%s]/div/div[5]/h2/a/text()'%i)
all_article = len(parseHtml.xpath('//*[@id="pjax"]/div/nav/a')) + 1 # 全部分页数
return all_article
class feishu():
def __init__(self,mobile):
self.mobile = mobile
self.token = feishu.get_token()
def getloverwords(self):
texts = []
for i in range(1, int(random.randint(3,83))):
url = 'https://www.duanwenxue.com/huayu/tianyanmiyu/list_{}.html'.format(i)
response = requests.get(url)
texts.append(response.text)
articles = []
for text in texts:
soup = BeautifulSoup(text, 'lxml')
arttis = soup.find('div', class_='list-short-article').find_all('a', {'target': "_blank"}) # 寻找情话内容
# 通过列表推导式以及for循环获取到每个a标签里面的text内容并通过strip去除空格
articles.extend([arttis[i].text.strip() for i in range(len(arttis))])
todaywords = articles[random.randint(0, len(articles) - 1)] # 随机选取其中一条情话
return todaywords
def getuserid(self):
headers_group = {
"Authorization": "Bearer %s" % self.token,
"Content-Type": "application/json"
}
try:
userurl = "https://open.feishu.cn/open-apis/user/v1/batch_get_id?mobiles=%s" %self.mobile
res_data = requests.get(url=userurl, headers=headers_group)
code = json.loads(res_data.text).get('code')
if code == 0:
userid = json.loads(res_data.text)['data']['mobile_users'][self.mobile][0]['user_id']
return userid
else:
error = json.loads(res_data.text).get('msg')
print('请求出错:{}'.format(error))
except:
print('请求失败')
def uploadimg(self):
imgname = random.choice(os.listdir('/scripts/feishu/images'))
with open("/scripts/feishu/images/%s" % imgname, 'rb') as p:
image = p.read()
imgurl = "https://open.feishu.cn/open-apis/image/v4/put/"
headers = {"Authorization": "Bearer %s" % self.token}
files = {
'image': image
}
imgdata = {
"image_type": "message"
}
resp = requests.post(url=imgurl, headers=headers, files=files, data=imgdata)
os.remove('/scripts/feishu/images/%s' % imgname)
resp.raise_for_status()
content = resp.json()
return content['data']['image_key']
def sendmess(self,title, link, user_id, image_key=None):
headers_group = {
"Authorization": "Bearer %s" % self.token,
"Content-Type": "application/json"
}
message_url = "https://open.feishu.cn/open-apis/message/v4/send/"
# 发送富文本消息
data = {
"user_id": user_id,
"msg_type": "post",
"content": {
"post": {
"zh_cn": {
"title": "表情包来了",
"content": [
[
{
"tag": "text",
"un_escape": True,
"text": "%s " %self.getloverwords()
},
],
[
{
"tag": "text",
"un_escape": True,
"text": "博文推荐 :"
},
{
"tag": "a",
"text": "%s" % title,
"href": "%s" % link
},
{
"tag": "at",
"user_id": user_id
}
],
[
{
"tag": "img",
"image_key": image_key,
"width": 1200,
"height": 1200
}
]
]
}
}
}
}
request = requests.post(url=message_url, headers=headers_group, json=data)
@staticmethod # 静态方法 类或实例均可调用
def get_token(): # 改静态方法函数里不传入self 或 cls
data = {"app_id":"cli_9exxxxd","app_secret":"YJJxxxxYUi"}
headers = {"Content-Type": "application/json"}
url_token = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
try:
res = requests.post(url_token, json=data, headers=headers)
if res.status_code == 200:
token = (json.loads(res.text)).get('tenant_access_token')
return token
except:
print('请求失败')
if __name__ == '__main__':
url = "https://www.cjzshilong.cn"
solo_info = solo(url=url)
theme = solo_info.Theme()
mobiles = ["186xxxx6142","178xxxx4553"]
for mobile in mobiles:
if theme == 'Casper':
res_info = solo_info.get_casper() #返回casper文章标题和链接信息
num = int(random.randint(1,len(res_info[0])))
title = ''.join(res_info[0][num])
link = ''.join(res_info[1][num])
feishu_res = feishu(mobile)
userID = feishu_res.getuserid()
imgkey = feishu_res.uploadimg()
feishu_res.sendmess(title,link,userID,imgkey)
else:
res_info = solo_info.get_pingsu()
num = int(random.randint(1,len(res_info[0])))
title = ''.join(res_info[0][num])
link = ''.join(res_info[1][num])
feishu_res = feishu(mobile)
userID = feishu_res.getuserid()
imgkey = feishu_res.uploadimg()
feishu_res.sendmess(title,link,userID,imgkey)
import urllib.request
from lxml import etree
class solo():
def __init__(self,url):
self.url = url
self.Article_num = solo.get_ArticlesNums()
def Theme(self):
'''
检测solo皮肤,为Casper的话对应值
:return:
'''
response = urllib.request.urlopen(self.url)
html = response.read().decode()
parseHtml = etree.HTML(html)
theme = parseHtml.xpath('/html/body/footer/div/div[1]/text()[5]') # 全部分页数
info = ''.join(''.join(theme).strip().rsplit('\n'))
return info
def get_pingsu(self):
titles_list = []
links_list = []
for i in range(1, self.Article_num + 1):
url_1 = "https://www.cjzshilong.cn/?p=%s" % i # 分页数里面的链接
response_1 = urllib.request.urlopen(url_1)
html_1 = response_1.read().decode()
parseHtml_1 = etree.HTML(html_1)
article_num = len(parseHtml_1.xpath('//*[@id="pjax"]/div/main/article'))
for n in range(1, article_num + 1):
titles = parseHtml_1.xpath('//*[@id="pjax"]/div/main/article[%s]/div/div[5]/h2/a/text()' % n)
links = parseHtml_1.xpath('//*[@id="pjax"]/div/main/article[%s]/div/div[5]/h2/a//@href' % n)
new_titles = ''.join(titles).strip().rsplit('\n')
links_list.append(links)
titles_list.append(new_titles)
return titles_list, links_list
def get_casper(self):
titles_list = []
links_list = []
for i in range(1, self.Article_num + 1):
url_1 = "https://www.cjzshilong.cn/?p=%s" % i # 分页数里面的链接
response_1 = urllib.request.urlopen(url_1)
html_1 = response_1.read().decode()
parseHtml_1 = etree.HTML(html_1)
article_num = len(parseHtml_1.xpath('//*[@id="pjax"]/div/div/article'))
for n in range(1, article_num + 1):
titles = parseHtml_1.xpath('//*[@id="pjax"]/div/div/article[%s]/div/h2/a/text()' % n)
links = parseHtml_1.xpath('//*[@id="pjax"]/div/div/article[%s]/div/h2/a//@href' % n)
new_titles = ''.join(titles).strip().rsplit('\n')
links_list.append(links)
titles_list.append(new_titles)
return titles_list, links_list
@staticmethod
def get_ArticlesNums():
response = urllib.request.urlopen(url)
html = response.read().decode()
parseHtml = etree.HTML(html)
# 遍历单个页数的文章标题
# for i in range(1,len(article_num)):
# titles = parseHtml.xpath('//*[@id="pjax"]/div/main/article[%s]/div/div[5]/h2/a/text()'%i)
all_article = len(parseHtml.xpath('//*[@id="pjax"]/div/nav/a')) + 1 # 全部分页数
return all_article
def main(self):
theme = solo.Theme(self)
if __name__ == '__main__':
url = "https://www.cjzshilong.cn"
info = solo(url=url)
theme = info.main()
if theme is not "":
res = info.get_casper()
else:
res = info.get_pingsu()