Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 86 additions & 23 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,44 @@ import queueMicrotask from 'queue-microtask'
/**
* Parse a torrent identifier (magnet uri, .torrent file, info hash)
* @param {string|ArrayBufferView|Object} torrentId
* @param {Object} options
* @param {string} options.hashMode - 'v1', 'v2', or 'both' (default: 'v1')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this a thing? can't it simply detect if it's a V1 V2 or both? is this really required?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I think we would need to - wouldn't know in advance what version a torrent is at the time options are passed.

It's a three way switch, most reliably detected ime by checking for (length || files) for v1, file tree for v2, and both for hybrid. (Checking the meta version can be iffy and doesnt disambiguate hybrid from v2-only) :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ThaUnknown I considered this. I suppose if the user only supplies an infoHash without infoHashV2, a btih without a btmh, or a torrent file with length or files without a file tree, then assume v1. Same for vice-versa.

Applied this change in f34b051

* @return {Object}
*/
async function parseTorrent (torrentId) {
async function parseTorrent (torrentId, options = {}) {
if (typeof torrentId === 'string' && /^(stream-)?magnet:/.test(torrentId)) {
// if magnet uri (string)
const torrentObj = magnet(torrentId)

// infoHash won't be defined if a non-bittorrent magnet is passed
if (!torrentObj.infoHash) {
// infoHash (v1) or infoHashV2 (v2) won't be defined if a non-bittorrent magnet is passed
if (!torrentObj.infoHash && !torrentObj.infoHashV2) {
throw new Error('Invalid torrent identifier')
}

return torrentObj
} else if (typeof torrentId === 'string' && (/^[a-f0-9]{40}$/i.test(torrentId) || /^[a-z2-7]{32}$/i.test(torrentId))) {
// if info hash (hex/base-32 string)
// if info hash v1 (hex/base-32 string)
return magnet(`magnet:?xt=urn:btih:${torrentId}`)
} else if (typeof torrentId === 'string' && /^[a-f0-9]{64}$/i.test(torrentId)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hate this double if thing

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dbc9501

Is this better?

// if info hash v2 (hex string)
return magnet(`magnet:?xt=urn:btmh:1220${torrentId}`)
} else if (ArrayBuffer.isView(torrentId) && torrentId.length === 20) {
// if info hash (buffer)
// if info hash v1 (buffer)
return magnet(`magnet:?xt=urn:btih:${arr2hex(torrentId)}`)
} else if (ArrayBuffer.isView(torrentId) && torrentId.length === 32) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

20374a9

Good call. Redundant. This better?

// if info hash v2 (buffer)
return magnet(`magnet:?xt=urn:btmh:1220${arr2hex(torrentId)}`)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not exactly sure how the torrentId field is used downstream, but I see ability to specify a torrent by SHA1 hash, SHA256 hash, but I'm not sure how we would pass both in the case of a hybrid torrent here. Would we just treat both swarms as separate? It is legal to have both btih and btmh in a magnet (and how hybrid magnets usually work)

} else if (ArrayBuffer.isView(torrentId)) {
// if .torrent file (buffer)
return await decodeTorrentFile(torrentId) // might throw
} else if (torrentId && torrentId.infoHash) {
return await decodeTorrentFile(torrentId, options) // might throw
} else if (torrentId && (torrentId.infoHash || torrentId.infoHashV2)) {
// if parsed torrent (from `parse-torrent` or `magnet-uri`)
torrentId.infoHash = torrentId.infoHash.toLowerCase()
if (torrentId.infoHash) {
torrentId.infoHash = torrentId.infoHash.toLowerCase()
}
if (torrentId.infoHashV2) {
torrentId.infoHashV2 = torrentId.infoHashV2.toLowerCase()
}

if (!torrentId.announce) torrentId.announce = []

Expand All @@ -57,13 +70,13 @@ async function parseTorrentRemote (torrentId, opts, cb) {

let parsedTorrent
try {
parsedTorrent = await parseTorrent(torrentId)
parsedTorrent = await parseTorrent(torrentId, opts)
} catch (err) {
// If torrent fails to parse, it could be a Blob, http/https URL or
// filesystem path, so don't consider it an error yet.
}

if (parsedTorrent && parsedTorrent.infoHash) {
if (parsedTorrent && (parsedTorrent.infoHash || parsedTorrent.infoHashV2)) {
queueMicrotask(() => {
cb(null, parsedTorrent)
})
Expand Down Expand Up @@ -100,21 +113,23 @@ async function parseTorrentRemote (torrentId, opts, cb) {

async function parseOrThrow (torrentBuf) {
try {
parsedTorrent = await parseTorrent(torrentBuf)
parsedTorrent = await parseTorrent(torrentBuf, opts)
} catch (err) {
return cb(err)
}
if (parsedTorrent && parsedTorrent.infoHash) cb(null, parsedTorrent)
if (parsedTorrent && (parsedTorrent.infoHash || parsedTorrent.infoHashV2)) cb(null, parsedTorrent)
else cb(new Error('Invalid torrent identifier'))
}
}

/**
* Parse a torrent. Throws an exception if the torrent is missing required fields.
* @param {ArrayBufferView|Object} torrent
* @param {Object} options
* @param {string} options.hashMode - 'v1', 'v2', or 'both' (default: 'v1')
* @return {Object} parsed torrent
*/
async function decodeTorrentFile (torrent) {
async function decodeTorrentFile (torrent, options = {}) {
if (ArrayBuffer.isView(torrent)) {
torrent = bencode.decode(torrent)
}
Expand All @@ -123,15 +138,24 @@ async function decodeTorrentFile (torrent) {
ensure(torrent.info, 'info')
ensure(torrent.info['name.utf-8'] || torrent.info.name, 'info.name')
ensure(torrent.info['piece length'], 'info[\'piece length\']')
ensure(torrent.info.pieces, 'info.pieces')

if (torrent.info.files) {
torrent.info.files.forEach(file => {
ensure(typeof file.length === 'number', 'info.files[0].length')
ensure(file['path.utf-8'] || file.path, 'info.files[0].path')
})
const isV2 = torrent.info['meta version'] === 2

if (isV2) {
// BitTorrent v2 specific validation
ensure(torrent.info['file tree'], 'info[\'file tree\']')

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably also check for presence of piece layers here too :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great call. 5577ce6

} else {
ensure(typeof torrent.info.length === 'number', 'info.length')
// BitTorrent v1 validation

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am reading on phone so not sure if I'm getting the control flow right, but is this v1 validation leg exclusive with v2? We would want to do both in the case of hybrid torrents.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops. Right. v2 Torrents would have had their v1 validation skipped. Fixed: 301c07b

ensure(torrent.info.pieces, 'info.pieces')

if (torrent.info.files) {
torrent.info.files.forEach(file => {
ensure(typeof file.length === 'number', 'info.files[0].length')
ensure(file['path.utf-8'] || file.path, 'info.files[0].path')
})
} else {
ensure(typeof torrent.info.length === 'number', 'info.length')
}
}

const result = {
Expand All @@ -141,8 +165,18 @@ async function decodeTorrentFile (torrent) {
announce: []
}

result.infoHashBuffer = await hash(result.infoBuffer)
result.infoHash = arr2hex(result.infoHashBuffer)
// Generate hashes based on user preference
const { hashMode = 'v1' } = options

if (hashMode === 'v1' || hashMode === 'both') {
result.infoHashBuffer = await hash(result.infoBuffer)
result.infoHash = arr2hex(result.infoHashBuffer)
}

if (hashMode === 'v2' || hashMode === 'both') {
result.infoHashV2Buffer = await hash(result.infoBuffer, undefined, 'sha-256')
result.infoHashV2 = arr2hex(result.infoHashV2Buffer)
}

if (torrent.info.private !== undefined) result.private = !!torrent.info.private

Expand Down Expand Up @@ -175,6 +209,28 @@ async function decodeTorrentFile (torrent) {
result.announce = Array.from(new Set(result.announce))
result.urlList = Array.from(new Set(result.urlList))

// Process files (simplified to use same logic for v1 and v2)
if (isV2 && torrent.info['file tree']) {
// Convert v2 file tree to v1-style files array for consistent processing
const files = []
function processFileTree (tree, currentPath = []) {
for (const [name, entry] of Object.entries(tree)) {
const fullPath = [...currentPath, name]
if (entry.length !== undefined) {
files.push({
'path.utf-8': fullPath,
length: entry.length
})
} else {
processFileTree(entry, fullPath)
}
}
}
processFileTree(torrent.info['file tree'])
torrent.info.files = files

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. Flattening the tree for easier processing is the right call, but it seems like we may be implicitly making a hybrid torrent here? Adding files without pieces would look like an (invalid) torrent file to some consumers. Thinking about what we might need to make piece validation easier downstream, and some method for flattening the file tree to get the path, length, and piece root would be good, but I think we may not want to assign it to the v1 files key

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right! We'd accidentally be making invalid hybrid torrents when someone feeds in a v2-only one. Fixed: f147872

}

// Use unified file processing logic
const files = torrent.info.files || [torrent.info]
result.files = files.map((file, i) => {
const parts = [].concat(result.name, file['path.utf-8'] || file.path || []).map(p => ArrayBuffer.isView(p) ? arr2text(p) : p)
Expand All @@ -192,7 +248,14 @@ async function decodeTorrentFile (torrent) {

result.pieceLength = torrent.info['piece length']
result.lastPieceLength = ((lastFile.offset + lastFile.length) % result.pieceLength) || result.pieceLength
result.pieces = splitPieces(torrent.info.pieces)

// Simplified pieces handling - fall back to v1 logic for both
if (torrent.info.pieces) {
result.pieces = splitPieces(torrent.info.pieces)
} else {
// For v2 torrents without pieces, create empty array
result.pieces = []
}

return result
}
Expand Down
114 changes: 114 additions & 0 deletions test/bittorrent-v2.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import fs from 'fs'
import parseTorrent from '../index.js'
import test from 'tape'

test('Test BitTorrent v2 hash support', async t => {
let parsed

// v2 info hash (as a hex string - 64 characters)
const v2Hash = 'a'.repeat(64)
parsed = await parseTorrent(v2Hash)
t.equal(parsed.infoHashV2, v2Hash.toLowerCase())
t.equal(parsed.name, undefined)
t.deepEqual(parsed.announce, [])

// v2 info hash (as a Buffer - 32 bytes)
const v2HashBuffer = Buffer.from(v2Hash, 'hex')
parsed = await parseTorrent(v2HashBuffer)
t.equal(parsed.infoHashV2, v2Hash.toLowerCase())

// magnet uri with v2 hash (btmh)
const magnetV2 = `magnet:?xt=urn:btmh:1220${v2Hash}`
parsed = await parseTorrent(magnetV2)
t.ok(parsed.infoHashV2)

// parsed torrent with both v1 and v2 hashes (hybrid)
const torrentObjHybrid = {
infoHash: 'd2474e86c95b19b8bcfdb92bc12c9d44667cfa36',
infoHashV2: v2Hash
}
parsed = await parseTorrent(torrentObjHybrid)
t.equal(parsed.infoHash, 'd2474e86c95b19b8bcfdb92bc12c9d44667cfa36')
t.equal(parsed.infoHashV2, v2Hash.toLowerCase())

t.end()
})

test('Parse BitTorrent v2 torrent files', async t => {
const v2Buf = fs.readFileSync('./test/torrents/bittorrent-v2-test.torrent')
const hybridBuf = fs.readFileSync('./test/torrents/bittorrent-v2-hybrid-test.torrent')

// Test v2 torrent with default settings (v1 hash only)
const v2Default = await parseTorrent(v2Buf)
t.ok(v2Default.infoHash, 'v2 torrent should have v1 hash by default')
t.notOk(v2Default.infoHashV2, 'v2 torrent should not have v2 hash by default')

// Test v2 torrent with both hashes
const v2Both = await parseTorrent(v2Buf, { hashMode: 'both' })
t.ok(v2Both.infoHash, 'Should have v1 hash')
t.ok(v2Both.infoHashV2, 'Should have v2 hash')
t.equal(v2Both.infoHash.length, 40, 'v1 hash should be 40 chars')
t.equal(v2Both.infoHashV2.length, 64, 'v2 hash should be 64 chars')

// Test v2 torrent with v2 hash only
const v2Only = await parseTorrent(v2Buf, { hashMode: 'v2' })
t.notOk(v2Only.infoHash, 'Should not have v1 hash')
t.ok(v2Only.infoHashV2, 'Should have v2 hash')

// Test hybrid torrent
const hybrid = await parseTorrent(hybridBuf, { hashMode: 'both' })
t.ok(hybrid.infoHash, 'Hybrid should have v1 hash')
t.ok(hybrid.infoHashV2, 'Hybrid should have v2 hash')

// All should have standard properties
;[v2Default, v2Both, v2Only, hybrid].forEach(parsed => {
t.ok(parsed.name, 'Should have name')
t.ok(Array.isArray(parsed.files), 'Should have files array')
t.ok(typeof parsed.length === 'number', 'Should have length')
})

t.end()
})

test('Test hash mode options', async t => {
const torrentBuf = fs.readFileSync('./test/torrents/bittorrent-v2-test.torrent')

// Test v1 mode (default)
const v1Mode = await parseTorrent(torrentBuf, { hashMode: 'v1' })
t.ok(v1Mode.infoHash, 'v1 mode should generate v1 hash')
t.notOk(v1Mode.infoHashV2, 'v1 mode should not generate v2 hash')

// Test v2 mode
const v2Mode = await parseTorrent(torrentBuf, { hashMode: 'v2' })
t.notOk(v2Mode.infoHash, 'v2 mode should not generate v1 hash')
t.ok(v2Mode.infoHashV2, 'v2 mode should generate v2 hash')

// Test both mode
const bothMode = await parseTorrent(torrentBuf, { hashMode: 'both' })
t.ok(bothMode.infoHash, 'both mode should generate v1 hash')
t.ok(bothMode.infoHashV2, 'both mode should generate v2 hash')

t.end()
})

test('Test validation requires either v1 or v2 hash', async t => {
// Test that magnet with no valid hash fails
try {
await parseTorrent('magnet:?xt=urn:invalid:123')
t.fail('Should have thrown error for invalid magnet')
} catch (err) {
t.ok(err instanceof Error)
t.ok(err.message.includes('Invalid torrent identifier'))
}

// Test that object with neither hash fails
try {
await parseTorrent({ name: 'test' })
t.fail('Should have thrown error for object without hashes')
} catch (err) {
t.ok(err instanceof Error)
t.ok(err.message.includes('Invalid torrent identifier'))
}

t.end()
})
Binary file added test/torrents/bittorrent-v2-hybrid-test.torrent
Binary file not shown.
Binary file added test/torrents/bittorrent-v2-test.torrent
Binary file not shown.