本文将详细介绍如何使用Python爬取美团商家订单。以下是具体步骤和细节解析:
一、登录美团商家平台获取cookies
要爬取美团商家订单,我们需要先登录美团商家平台获取登录后的cookies。简单来说,cookie是一种在客户端存储数据的技术。在我们访问网站的时候,网站会将一些信息存储在我们的电脑上,比如登录状态等。在这里,我们需要使用的是美团商家平台登录后的cookies。
import requests
login_url = 'https://merchant.meituan.com/account/login'
sess = requests.session()
resp = sess.get(login_url)
cookies = sess.cookies.get_dict()
我们使用requests库向美团商家登录界面发送请求,获取登录页面的cookies。
二、模拟登录并获取美团商家订单
有了美团商家平台登录后的cookies,我们就可以进行模拟登录并获取商家订单了。
import hashlib
import time
def get_sign(params, app_secret):
data = ''
for key in sorted(params):
if key in ('waimai-bid', 'wmpoiid', 'time'):
data += str(params[key])
data += app_secret
res = hashlib.md5(data.encode()).hexdigest()
return res
def get_orders(start_time, end_time, shop_id, app_key, app_secret, cookies):
order_url = "https://merchant.meituan.com/order/api/v1/order/paging"
headers = {
'referer': 'https://merchant.meituan.com/order/history?timeline=day&date='+start_time[:10],
'cookie': 'wmPoiId='+cookies['wmPoiId'],
'x-requested-with': 'XMLHttpRequest'
}
params = {
'startTime': str(start_time),
'endTime': str(end_time),
'shopId': int(shop_id),
'limit': 20,
'offset': 0,
'appKey': app_key,
'time': int(time.time()*1000)
}
sign = get_sign(params, app_secret)
params.update({'sign': sign})
resp = requests.get(order_url, headers=headers, params=params)
return resp.json()
使用requests库发送请求,将登录后的cookies、时间段、店铺ID等参数传递给API接口,即可获取商家订单数据。
三、解析订单数据并存储
获取到的商家订单数据是一个JSON格式的文件,我们需要对其进行解析并存储为Excel文件。
import pandas as pd
def parse_orders(orders):
data = []
for order in orders['data']['orders']:
order_info = {
'platform_order_id': order['delivery_id'],
'dinner_date': order['caidan'][0]['day'],
'dinner_time': order['caidan'][0]['time'],
'total': order['caidan'][0]['totle_price'],
'actual_pay': order['caidan'][0]['real_price'],
'order_status': order['status']
}
data.append(order_info)
df = pd.DataFrame(data)
return df
def save_to_excel(df, file_name):
writer = pd.ExcelWriter(file_name+'.xlsx')
df.to_excel(writer, index=False)
writer.save()
我们使用pandas库解析JSON格式的订单数据,并将其保存为Excel文件。
四、完整代码示例
以下是完整的代码示例:
import requests
import hashlib
import time
import pandas as pd
def get_sign(params, app_secret):
data = ''
for key in sorted(params):
if key in ('waimai-bid', 'wmpoiid', 'time'):
data += str(params[key])
data += app_secret
res = hashlib.md5(data.encode()).hexdigest()
return res
def get_orders(start_time, end_time, shop_id, app_key, app_secret, cookies):
order_url = "https://merchant.meituan.com/order/api/v1/order/paging"
headers = {
'referer': 'https://merchant.meituan.com/order/history?timeline=day&date='+start_time[:10],
'cookie': 'wmPoiId='+cookies['wmPoiId'],
'x-requested-with': 'XMLHttpRequest'
}
params = {
'startTime': str(start_time),
'endTime': str(end_time),
'shopId': int(shop_id),
'limit': 20,
'offset': 0,
'appKey': app_key,
'time': int(time.time()*1000)
}
sign = get_sign(params, app_secret)
params.update({'sign': sign})
resp = requests.get(order_url, headers=headers, params=params)
return resp.json()
def parse_orders(orders):
data = []
for order in orders['data']['orders']:
order_info = {
'platform_order_id': order['delivery_id'],
'dinner_date': order['caidan'][0]['day'],
'dinner_time': order['caidan'][0]['time'],
'total': order['caidan'][0]['totle_price'],
'actual_pay': order['caidan'][0]['real_price'],
'order_status': order['status']
}
data.append(order_info)
df = pd.DataFrame(data)
return df
def save_to_excel(df, file_name):
writer = pd.ExcelWriter(file_name+'.xlsx')
df.to_excel(writer, index=False)
writer.save()
if __name__ == '__main__':
cookies = {
'UM_distinctid': '...',
'...': '...',
'wmPoiId': '...',
'...': '...',
}
shop_id = '111111'
start_time = '2022-01-01 00:00:00'
end_time = '2022-01-01 23:59:59'
app_key = '...'
app_secret = '...'
orders = get_orders(start_time, end_time, shop_id, app_key, app_secret, cookies)
df = parse_orders(orders)
file_name = 'order_data'
save_to_excel(df, file_name)