Skip to content

参考资料:商丘市教育体育局鼠标移动设置Cookie #12

@anxiangyipiao

Description

@anxiangyipiao

import requests
import re

url = 'https://jytyj.shangqiu.gov.cn/zwgk/fdzdgknr/zfcg31sqsjytyj/zbgg31sqsjytyj_2'

headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
}

def first_cookie():
"""
获取页面响应中JavaScript设置的cookie
"""
res = requests.get(url, headers=headers)

# 使用正则表达式从JavaScript中提取cookie值
cookie_pattern = re.search(r'return\s*"([^"]+)"', res.text)

if cookie_pattern:
    cookie_string = cookie_pattern.group(1)
    # 提取cookie值,去掉path部分
    cookie = cookie_string.split(';')[0].strip()
    return cookie

return None

def xpath_with_response(res):
"""
使用XPath解析响应内容
"""
from lxml import etree

# 解析HTML
html = etree.HTML(res.text)

# 使用XPath提取数据
data = html.xpath('//ul[@class="infoList"]/li')

return data

def request_with_cookie(cookie):
"""
使用提取的cookie进行请求
"""
headers = {
'Cookie': cookie,
}

response = requests.get(url, headers=headers)

list = xpath_with_response(response)

for item in list:
    title = item.xpath('./a/text()')[0]
    link = item.xpath('./a/@href')[0]
    print(f"标题: {title}, 链接: {link}")

使用示例

if name == "main":
cookie = first_cookie()
print(f"提取的Cookie: {cookie}")
if cookie:
request_with_cookie(cookie)
else:
print("未能提取到Cookie")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions