updated script
parse html instead of just regex for complete file names
This commit is contained in:
182
main.ts
182
main.ts
@@ -1,13 +1,14 @@
|
||||
import { existsSync } from 'node:fs'
|
||||
import { mkdir, writeFile } from 'node:fs/promises'
|
||||
import path from 'node:path'
|
||||
import {argv} from 'node:process'
|
||||
import { argv } from 'node:process'
|
||||
import { Buffer } from "node:buffer"
|
||||
import * as cheerio from "https://esm.sh/cheerio?target=esnext"
|
||||
|
||||
const REGEX_PLAYLISTURL = /^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/?$/i
|
||||
const REGEX_SONGURL = /^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/[^\/]+\/?$/i
|
||||
const REGEX_FILEPATHPARSE = /^\/soundtracks\/([^\/]+)\/[^\/]+\/([^\/]+)$/i
|
||||
const REGEX_PLAYLIST_SONGHREF = /<td class="playlistDownloadSong"><a href="(.*?)">/gi
|
||||
const REGEX_SONG_FILEHREF = /<a\s+href=['"](.*?\.flac)['"]><span\s+class=['"]songDownloadLink/i
|
||||
const REGEX_ALBUMTITLE = /^(.*?) MP3 - Download/i
|
||||
const REGEX_UNSAFEFORFILE = /[^a-z0-9\-_=+,.()\[\]{} ]/gi
|
||||
|
||||
async function main() {
|
||||
const playlistURL = argv[2]
|
||||
@@ -25,23 +26,129 @@ async function main() {
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchPlaylist(url: string): Promise<string[]> {
|
||||
async function fetchPlaylist(url: string): Promise<PlaylistSongData[]> {
|
||||
if (!REGEX_PLAYLISTURL.test(url)) {
|
||||
throw `unaccepted url ${url}`
|
||||
}
|
||||
|
||||
const resp = await fetch(url)
|
||||
const text = await resp.text()
|
||||
const req = await fetch(url)
|
||||
const text = await req.text()
|
||||
|
||||
const matches = text.matchAll(REGEX_PLAYLIST_SONGHREF)
|
||||
const matchesArr = [...matches].map(match => match[1])
|
||||
const $ = cheerio.load(text)
|
||||
|
||||
console.log(`downloading ${matchesArr.length} songs`)
|
||||
const title = $.extract({
|
||||
title: 'title'
|
||||
}).title ?? ''
|
||||
const titleMatch = REGEX_ALBUMTITLE.exec(title)
|
||||
if (titleMatch === null) {
|
||||
throw `unable to grab album name from ${title}`
|
||||
}
|
||||
const albumName = titleMatch[1]
|
||||
|
||||
const columns = getPlaylistTableHeaders($)
|
||||
const rows = getPlaylistRows($, columns, albumName)
|
||||
|
||||
console.log(`downloading ${rows.length} songs`)
|
||||
|
||||
return matchesArr
|
||||
return rows
|
||||
}
|
||||
|
||||
async function downloadSong(url: string, location: string) {
|
||||
interface PlaylistTableColumns {
|
||||
name: number
|
||||
track: number
|
||||
cd: number
|
||||
}
|
||||
interface PlaylistSongData {
|
||||
url: string
|
||||
album: string
|
||||
name: string
|
||||
track: number
|
||||
cd: number
|
||||
}
|
||||
|
||||
function getPlaylistTableHeaders($: cheerio.CheerioAPI): PlaylistTableColumns {
|
||||
const header = $('#songlist tr#songlist_header')
|
||||
const headerCells = header.extract({
|
||||
cells: ['th']
|
||||
})
|
||||
|
||||
const indexes = headerCells.cells.reduce((p, c, i) => {
|
||||
switch (c.toLocaleLowerCase()) {
|
||||
case 'cd':
|
||||
p.cd = i
|
||||
break
|
||||
case '#':
|
||||
p.track = i
|
||||
break
|
||||
case 'song name':
|
||||
p.name = i
|
||||
}
|
||||
return p
|
||||
}, {
|
||||
name: -1,
|
||||
track: -1,
|
||||
cd: -1,
|
||||
})
|
||||
|
||||
if (indexes.name == -1) {
|
||||
throw 'unable to find song title column'
|
||||
}
|
||||
|
||||
return indexes
|
||||
}
|
||||
|
||||
function getPlaylistRows($: cheerio.CheerioAPI, columns: PlaylistTableColumns, albumName: string): PlaylistSongData[] {
|
||||
const rows = $('#songlist tr:not(#songlist_header):not(#songlist_footer)')
|
||||
const rowsData: PlaylistSongData[] = []
|
||||
|
||||
rows.each((_, rowEl) => {
|
||||
const row = cheerio.load(rowEl)
|
||||
const rowData: PlaylistSongData = {
|
||||
url: '',
|
||||
album: albumName,
|
||||
name: '',
|
||||
track: -1,
|
||||
cd: -1,
|
||||
}
|
||||
|
||||
rowData.url = row.extract({
|
||||
url: {
|
||||
selector: 'td.playlistDownloadSong a',
|
||||
value: 'href'
|
||||
}
|
||||
}).url ?? ''
|
||||
|
||||
const rowCells = row.extract({
|
||||
cells: ['td']
|
||||
}).cells
|
||||
|
||||
rowData.name = rowCells[columns.name]
|
||||
if (!rowData.name) {
|
||||
throw `unable to grab song name from ${albumName} in row ${columns.name} - ${rowCells}`
|
||||
}
|
||||
|
||||
if (columns.track >= 0) {
|
||||
rowData.track = parseInt(rowCells[columns.track] ?? '-1')
|
||||
if (isNaN(rowData.track)) {
|
||||
rowData.track = -1
|
||||
}
|
||||
}
|
||||
|
||||
if (columns.cd >= 0) {
|
||||
rowData.cd = parseInt(rowCells[columns.cd] ?? '-1')
|
||||
if (isNaN(rowData.cd)) {
|
||||
rowData.cd = -1
|
||||
}
|
||||
}
|
||||
|
||||
rowsData.push(rowData)
|
||||
})
|
||||
|
||||
return rowsData
|
||||
}
|
||||
|
||||
async function downloadSong(song: PlaylistSongData, location: string) {
|
||||
let url = song.url
|
||||
if (!/^http/i.test(url)) {
|
||||
url = 'https://downloads.khinsider.com' + url
|
||||
}
|
||||
@@ -49,24 +156,22 @@ async function downloadSong(url: string, location: string) {
|
||||
throw `unaccepted url ${url}`
|
||||
}
|
||||
|
||||
let resp = await fetch(url)
|
||||
let text = await resp.text()
|
||||
let match = text.match(REGEX_SONG_FILEHREF)
|
||||
if (!match) {
|
||||
const resp = await fetch(url)
|
||||
const text = await resp.text()
|
||||
|
||||
const $ = cheerio.load(text)
|
||||
const flacUrl = $.extract({
|
||||
url: {
|
||||
selector: '#pageContent a[href*="flac"]',
|
||||
value: 'href'
|
||||
}
|
||||
}).url
|
||||
|
||||
if (!flacUrl) {
|
||||
throw `can't find download link for ${url}`
|
||||
}
|
||||
|
||||
|
||||
const songurl = new URL(match[1])
|
||||
const songurlmatch = REGEX_FILEPATHPARSE.exec(songurl.pathname)
|
||||
if (!songurlmatch) {
|
||||
throw `can't find folder and filename for ${songurl}`
|
||||
}
|
||||
|
||||
const foldername = decodeURIComponent(songurlmatch[1])
|
||||
const filename = decodeURIComponent(songurlmatch[2])
|
||||
const pathname = path.resolve(location, foldername)
|
||||
const fullpathname = path.resolve(pathname, filename)
|
||||
const { pathname, fullpathname } = pathFor(location, song)
|
||||
|
||||
if (!existsSync(pathname)) {
|
||||
await mkdir(pathname)
|
||||
@@ -76,13 +181,34 @@ async function downloadSong(url: string, location: string) {
|
||||
}
|
||||
|
||||
console.log(`downloading ${fullpathname}`)
|
||||
console.log(` from ${flacUrl}`)
|
||||
|
||||
const songresp = await fetch(songurl)
|
||||
const songresp = await fetch(flacUrl)
|
||||
const songblob = await songresp.arrayBuffer()
|
||||
|
||||
return writeFile(fullpathname, toBuffer(songblob))
|
||||
}
|
||||
|
||||
interface SongPath {
|
||||
pathname: string
|
||||
fullpathname: string
|
||||
}
|
||||
|
||||
function pathFor(location: string, song: PlaylistSongData): SongPath {
|
||||
const albumname = song.album.replace(REGEX_UNSAFEFORFILE, '')
|
||||
const songname = song.name.replace(REGEX_UNSAFEFORFILE, '')
|
||||
|
||||
const filename = `${song.cd >= 0 ? song.cd+'.' : ''}${song.track >= 0 ? song.track+'.' : ''}${song.cd >= 0 || song.track >= 0 ? ' ' : ''}${songname}.flac`
|
||||
|
||||
const pathname = path.resolve(location, albumname)
|
||||
const fullpathname = path.resolve(pathname, filename)
|
||||
|
||||
return {
|
||||
pathname,
|
||||
fullpathname
|
||||
}
|
||||
}
|
||||
|
||||
function toBuffer(arrayBuffer: ArrayBuffer) {
|
||||
const buffer = Buffer.alloc(arrayBuffer.byteLength);
|
||||
const view = new Uint8Array(arrayBuffer);
|
||||
|
||||
Reference in New Issue
Block a user