222 lines
5.3 KiB
TypeScript
222 lines
5.3 KiB
TypeScript
import { existsSync } from 'node:fs'
|
|
import { mkdir, writeFile } from 'node:fs/promises'
|
|
import path from 'node:path'
|
|
import { argv } from 'node:process'
|
|
import { Buffer } from "node:buffer"
|
|
import * as cheerio from "https://esm.sh/cheerio?target=esnext"
|
|
|
|
const REGEX_PLAYLISTURL = /^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/?$/i
|
|
const REGEX_SONGURL = /^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/[^\/]+\/?$/i
|
|
const REGEX_ALBUMTITLE = /^(.*?) MP3 - Download/i
|
|
const REGEX_UNSAFEFORFILE = /[^a-z0-9\-_=+,.()\[\]{} ]/gi
|
|
|
|
async function main() {
|
|
const playlistURL = argv[2]
|
|
const downloadPath = argv[3] || '.'
|
|
const runSync = argv[4]?.toLowerCase() == 'sync'
|
|
|
|
const playlist = await fetchPlaylist(playlistURL)
|
|
|
|
if (runSync) {
|
|
for (const song of playlist) {
|
|
await downloadSong(song, downloadPath || '.')
|
|
}
|
|
} else {
|
|
await Promise.all(playlist.map(song => downloadSong(song, downloadPath || '.')))
|
|
}
|
|
}
|
|
|
|
async function fetchPlaylist(url: string): Promise<PlaylistSongData[]> {
|
|
if (!REGEX_PLAYLISTURL.test(url)) {
|
|
throw `unaccepted url ${url}`
|
|
}
|
|
|
|
const req = await fetch(url)
|
|
const text = await req.text()
|
|
|
|
const $ = cheerio.load(text)
|
|
|
|
const title = $.extract({
|
|
title: 'title'
|
|
}).title ?? ''
|
|
const titleMatch = REGEX_ALBUMTITLE.exec(title)
|
|
if (titleMatch === null) {
|
|
throw `unable to grab album name from ${title}`
|
|
}
|
|
const albumName = titleMatch[1]
|
|
|
|
const columns = getPlaylistTableHeaders($)
|
|
const rows = getPlaylistRows($, columns, albumName)
|
|
|
|
console.log(`downloading ${rows.length} songs`)
|
|
|
|
return rows
|
|
}
|
|
|
|
interface PlaylistTableColumns {
|
|
name: number
|
|
track: number
|
|
cd: number
|
|
}
|
|
interface PlaylistSongData {
|
|
url: string
|
|
album: string
|
|
name: string
|
|
track: number
|
|
cd: number
|
|
}
|
|
|
|
function getPlaylistTableHeaders($: cheerio.CheerioAPI): PlaylistTableColumns {
|
|
const header = $('#songlist tr#songlist_header')
|
|
const headerCells = header.extract({
|
|
cells: ['th']
|
|
})
|
|
|
|
const indexes = headerCells.cells.reduce((p, c, i) => {
|
|
switch (c.toLocaleLowerCase()) {
|
|
case 'cd':
|
|
p.cd = i
|
|
break
|
|
case '#':
|
|
p.track = i
|
|
break
|
|
case 'song name':
|
|
p.name = i
|
|
}
|
|
return p
|
|
}, {
|
|
name: -1,
|
|
track: -1,
|
|
cd: -1,
|
|
})
|
|
|
|
if (indexes.name == -1) {
|
|
throw 'unable to find song title column'
|
|
}
|
|
|
|
return indexes
|
|
}
|
|
|
|
function getPlaylistRows($: cheerio.CheerioAPI, columns: PlaylistTableColumns, albumName: string): PlaylistSongData[] {
|
|
const rows = $('#songlist tr:not(#songlist_header):not(#songlist_footer)')
|
|
const rowsData: PlaylistSongData[] = []
|
|
|
|
rows.each((_, rowEl) => {
|
|
const row = cheerio.load(rowEl)
|
|
const rowData: PlaylistSongData = {
|
|
url: '',
|
|
album: albumName,
|
|
name: '',
|
|
track: -1,
|
|
cd: -1,
|
|
}
|
|
|
|
rowData.url = row.extract({
|
|
url: {
|
|
selector: 'td.playlistDownloadSong a',
|
|
value: 'href'
|
|
}
|
|
}).url ?? ''
|
|
|
|
const rowCells = row.extract({
|
|
cells: ['td']
|
|
}).cells
|
|
|
|
rowData.name = rowCells[columns.name]
|
|
if (!rowData.name) {
|
|
throw `unable to grab song name from ${albumName} in row ${columns.name} - ${rowCells}`
|
|
}
|
|
|
|
if (columns.track >= 0) {
|
|
rowData.track = parseInt(rowCells[columns.track] ?? '-1')
|
|
if (isNaN(rowData.track)) {
|
|
rowData.track = -1
|
|
}
|
|
}
|
|
|
|
if (columns.cd >= 0) {
|
|
rowData.cd = parseInt(rowCells[columns.cd] ?? '-1')
|
|
if (isNaN(rowData.cd)) {
|
|
rowData.cd = -1
|
|
}
|
|
}
|
|
|
|
rowsData.push(rowData)
|
|
})
|
|
|
|
return rowsData
|
|
}
|
|
|
|
async function downloadSong(song: PlaylistSongData, location: string) {
|
|
let url = song.url
|
|
if (!/^http/i.test(url)) {
|
|
url = 'https://downloads.khinsider.com' + url
|
|
}
|
|
if (!REGEX_SONGURL.test(url)) {
|
|
throw `unaccepted url ${url}`
|
|
}
|
|
|
|
const resp = await fetch(url)
|
|
const text = await resp.text()
|
|
|
|
const $ = cheerio.load(text)
|
|
const flacUrl = $.extract({
|
|
url: {
|
|
selector: '#pageContent a[href*="flac"]',
|
|
value: 'href'
|
|
}
|
|
}).url
|
|
|
|
if (!flacUrl) {
|
|
throw `can't find download link for ${url}`
|
|
}
|
|
|
|
const { pathname, fullpathname } = pathFor(location, song)
|
|
|
|
if (!existsSync(pathname)) {
|
|
await mkdir(pathname)
|
|
}
|
|
if (existsSync(fullpathname)) {
|
|
console.log(`skipping file already exists ${fullpathname}`)
|
|
}
|
|
|
|
console.log(`downloading ${fullpathname}`)
|
|
console.log(` from ${flacUrl}`)
|
|
|
|
const songresp = await fetch(flacUrl)
|
|
const songblob = await songresp.arrayBuffer()
|
|
|
|
return writeFile(fullpathname, toBuffer(songblob))
|
|
}
|
|
|
|
interface SongPath {
|
|
pathname: string
|
|
fullpathname: string
|
|
}
|
|
|
|
function pathFor(location: string, song: PlaylistSongData): SongPath {
|
|
const albumname = song.album.replace(REGEX_UNSAFEFORFILE, '')
|
|
const songname = song.name.replace(REGEX_UNSAFEFORFILE, '')
|
|
|
|
const filename = `${song.cd >= 0 ? song.cd+'.' : ''}${song.track >= 0 ? song.track+'.' : ''}${song.cd >= 0 || song.track >= 0 ? ' ' : ''}${songname}.flac`
|
|
|
|
const pathname = path.resolve(location, albumname)
|
|
const fullpathname = path.resolve(pathname, filename)
|
|
|
|
return {
|
|
pathname,
|
|
fullpathname
|
|
}
|
|
}
|
|
|
|
function toBuffer(arrayBuffer: ArrayBuffer) {
|
|
const buffer = Buffer.alloc(arrayBuffer.byteLength);
|
|
const view = new Uint8Array(arrayBuffer);
|
|
for (let i = 0; i < buffer.length; ++i) {
|
|
buffer[i] = view[i];
|
|
}
|
|
return buffer;
|
|
}
|
|
|
|
main().catch(e => console.error(e))
|