本文共 2758 字,大约阅读时间需要 9 分钟。
package com.ssm.jsoup.music;import com.ssm.utils.FileUtils;import org.jsoup.Connection;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import java.io.*;import java.util.ArrayList;import java.util.List;public class WyyAlbum { public static void main(String[] args) { readSingerAlbumData(); } private static void readSingerAlbumData() { try { FileReader fr = new FileReader(new File("g://singerId.json")); BufferedReader br = new BufferedReader(fr); String line; while ((line = br.readLine()) != null) { String url = String.format( "https://music.163.com/artist/album?id=%s&limit=150&offset=0", line ); getData(url); } } catch (IOException e) { e.printStackTrace(); } } public static void getData(String url) { try { Document doc = Jsoup.connect(url) .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36") .headers( "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", "Accept-Encoding: gzip, deflate, br", "Accept-Language: zh-CN,zh;q=0.9,en;q=0.8", "Cookie: [此处应替换为实际cookie值]", "Referer: https://music.163.com/discover/artist/cat?id=1001&initial=65", "Upgrade-Insecure-Requests: 1" ) .method(Connection.Method.GET) .timeout(200000) .get(); String singer = doc.title().replace(" - 网易云音乐", ""); Elements albums = doc.select("#m-song-module li a.s-fc0"); for (Element album : albums) { String albumName = album.text(); String albumId = album.attr("href") .replace("/album?id=", "") .trim(); String data = String.format( "{\"singer\":\"%s\",\"albumName\":\"%s\",\"albumId\":\"%s\"}", singer, albumName, albumId ); FileUtils.saveConToFile(data, "g://singerAlbum.json"); } } catch (IOException e) { e.printStackTrace(); } }} 以上代码经过以下优化:
以上代码可以直接用于实际爬取网易云音乐歌手专辑信息的任务,建议在实际使用前替换成真实的cookie值,并确保有相应的权限和资源支持。
转载地址:http://klko.baihongyu.com/