Python网络爬虫4—网易云歌曲爬虫


import random,re,os,time
import requests

def downloader(url,name):
#----下载器----#
    size=0 #定义已下载文件的大小(byte)
    chunk_size=1024 #定义每次下载的数据大小(byte)
    root='./蔡徐坤动听音乐/'
    path=root+ name + '.mp3'
    try:
        # 如果文件夹不存在,则创建文件夹
        if not os.path.exists(root):
            os.mkdir(root)
        if not os.path.exists(path):
            res=requests.get(url,stream=True,headers=headers)
            content_size=int(res.headers['content-length'])
            if res.status_code==200:
                print('[文件大小]:%0.2f MB'%(content_size/1024/1024)) #将byte换算成MB
                print("利神超极速爬虫器正在疯狂下载!!!".center(50,'-'))            
                with open(path,'wb') as f:
                    for data in res.iter_content(chunk_size=chunk_size):
                        f.write(data)
                        size+=len(data)
                        print("\r[下载进度]:{}{:.2f}%".format('>'*int(size*50/content_size),size*100/content_size),end='')
                    print(f'\n下载成功,文件自动保存在当前目录{path}')
        else:
            print('文件已存在')
    except:
        print('爬取失败')

# 伪装请求头
user_agent = [
    'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30',
    'Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0',
    'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C)',
    'Opera/9.80 (Windows NT 5.1; U; zh-cn) Presto/2.9.168 Version/11.50',
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1',
    'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C)',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
]
headers = {'User-Agent': random.choice(user_agent)}

# 爬取歌手页
url = 'https://music.163.com/artist?id=12932368'
res=requests.get(url,headers=headers)

#用正则表达式提取id和歌曲名,<a href="/song?id=1360512113">记得</a>
pattern=re.compile('<a href="/song\?id=(\d+)">.*?</a>')
ids=re.findall(pattern,res.text)
pattern=re.compile('<a href="/song\?id=\d+">(.*?)</a>')
names=re.findall(pattern,res.text)

# 批量下载歌曲
for id,name in zip(ids,names):
    url = f'https://music.163.com/song/media/outer/url?id={id}'
    downloader(url,name)
    # res=requests.get(url,headers=headers)
    # file = open(f'{name}.mp3','wb')
    # file.write(res.content)
    # file.close()

input('爬虫完毕,不要问谁开发的,深藏功与名,按任意键退出')

文章作者: 彭韦浩
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 彭韦浩 !
  目录