Skip to content

Commit 1d4dbd8

Browse files
feat: collect license evidences
fixes #33 --------- Signed-off-by: Augustus Kling <[email protected]> Signed-off-by: Jan Kowalleck <[email protected]> Co-authored-by: Jan Kowalleck <[email protected]>
1 parent 4b4b314 commit 1d4dbd8

14 files changed

+185890
-19
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ $ yarn cyclonedx
7777
This causes information loss in trade-off shorter PURLs, which might improve ingesting these strings.
7878
--output-reproducible Whether to go the extra mile and make the output reproducible.
7979
This might result in loss of time- and random-based values.
80+
--gather-license-texts Search for license files in components and include them as license evidence.
81+
This feature is experimental.
8082
--verbose,-v Increase the verbosity of messages.
8183
Use multiple times to increase the verbosity even more.
8284

src/_helpers.ts

+44
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Copyright (c) OWASP Foundation. All Rights Reserved.
1919

2020
import { xfs } from '@yarnpkg/fslib'
2121
import GitHost from 'hosted-git-info'
22+
import { extname, parse } from 'path'
2223

2324
export async function writeAllSync (fd: number, data: string): Promise<number> {
2425
const b = Buffer.from(data)
@@ -59,3 +60,46 @@ export function trySanitizeGitUrl (gitUrl: string): string {
5960
gitInfo.auth = undefined
6061
return gitInfo.toString()
6162
}
63+
64+
// region MIME
65+
66+
export type MimeType = string
67+
68+
const MIME_TEXT_PLAIN: MimeType = 'text/plain'
69+
70+
const MAP_TEXT_EXTENSION_MIME: Readonly<Record<string, MimeType>> = {
71+
'': MIME_TEXT_PLAIN,
72+
// https://www.iana.org/assignments/media-types/media-types.xhtml
73+
'.csv': 'text/csv',
74+
'.htm': 'text/html',
75+
'.html': 'text/html',
76+
'.md': 'text/markdown',
77+
'.txt': MIME_TEXT_PLAIN,
78+
'.rst': 'text/prs.fallenstein.rst',
79+
'.xml': 'text/xml', // not `application/xml` -- our scope is text!
80+
// add more mime types above this line. pull-requests welcome!
81+
// license-specific files
82+
'.license': MIME_TEXT_PLAIN,
83+
'.licence': MIME_TEXT_PLAIN
84+
} as const
85+
86+
export function getMimeForTextFile (filename: string): MimeType | undefined {
87+
return MAP_TEXT_EXTENSION_MIME[extname(filename).toLowerCase()]
88+
}
89+
90+
const LICENSE_FILENAME_BASE = new Set(['licence', 'license'])
91+
const LICENSE_FILENAME_EXT = new Set([
92+
'.apache',
93+
'.bsd',
94+
'.gpl',
95+
'.mit'
96+
])
97+
98+
export function getMimeForLicenseFile (filename: string): MimeType | undefined {
99+
const { name, ext } = parse(filename.toLowerCase())
100+
return LICENSE_FILENAME_BASE.has(name) && LICENSE_FILENAME_EXT.has(ext)
101+
? MIME_TEXT_PLAIN
102+
: MAP_TEXT_EXTENSION_MIME[ext]
103+
}
104+
105+
// endregion MIME

src/builders.ts

+81-16
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,29 @@ Copyright (c) OWASP Foundation. All Rights Reserved.
1919

2020
// import submodules so to prevent load of unused not-tree-shakable dependencies - like 'AJV'
2121
import type { FromNodePackageJson as PJB } from '@cyclonedx/cyclonedx-library/Builders'
22-
import { ComponentType, ExternalReferenceType, LicenseAcknowledgement } from '@cyclonedx/cyclonedx-library/Enums'
22+
import { AttachmentEncoding, ComponentType, ExternalReferenceType, LicenseAcknowledgement } from '@cyclonedx/cyclonedx-library/Enums'
2323
import type { FromNodePackageJson as PJF } from '@cyclonedx/cyclonedx-library/Factories'
24-
import { Bom, Component, ExternalReference, type License, Property, Tool } from '@cyclonedx/cyclonedx-library/Models'
24+
import { Attachment, Bom, Component, ComponentEvidence, ExternalReference, type License, NamedLicense, Property, Tool } from '@cyclonedx/cyclonedx-library/Models'
2525
import { BomUtility } from '@cyclonedx/cyclonedx-library/Utils'
2626
import { Cache, type FetchOptions, type Locator, type LocatorHash, type Package, type Project, structUtils, ThrowReport, type Workspace, YarnVersion } from '@yarnpkg/core'
2727
import { ppath } from '@yarnpkg/fslib'
2828
import { gitUtils as YarnPluginGitUtils } from '@yarnpkg/plugin-git'
2929
import normalizePackageData from 'normalize-package-data'
3030

3131
import { getBuildtimeInfo } from './_buildtimeInfo'
32-
import { isString, tryRemoveSecretsFromUrl, trySanitizeGitUrl } from './_helpers'
32+
import { getMimeForLicenseFile, isString, tryRemoveSecretsFromUrl, trySanitizeGitUrl } from './_helpers'
3333
import { wsAnchoredPackage } from './_yarnCompat'
3434
import { PropertyNames, PropertyValueBool } from './properties'
3535

3636
type ManifestFetcher = (pkg: Package) => Promise<any>
37+
type LicenseEvidenceFetcher = (pkg: Package) => AsyncGenerator<License>
3738

3839
interface BomBuilderOptions {
3940
omitDevDependencies?: BomBuilder['omitDevDependencies']
4041
metaComponentType?: BomBuilder['metaComponentType']
4142
reproducible?: BomBuilder['reproducible']
4243
shortPURLs?: BomBuilder['shortPURLs']
44+
gatherLicenseTexts?: BomBuilder['gatherLicenseTexts']
4345
}
4446

4547
export class BomBuilder {
@@ -51,6 +53,7 @@ export class BomBuilder {
5153
metaComponentType: ComponentType
5254
reproducible: boolean
5355
shortPURLs: boolean
56+
gatherLicenseTexts: boolean
5457

5558
console: Console
5659

@@ -69,13 +72,15 @@ export class BomBuilder {
6972
this.metaComponentType = options.metaComponentType ?? ComponentType.Application
7073
this.reproducible = options.reproducible ?? false
7174
this.shortPURLs = options.shortPURLs ?? false
75+
this.gatherLicenseTexts = options.gatherLicenseTexts ?? false
7276

7377
this.console = console_
7478
}
7579

7680
async buildFromWorkspace (workspace: Workspace): Promise<Bom> {
7781
// @TODO make switch to disable load from fs
7882
const fetchManifest: ManifestFetcher = await this.makeManifestFetcher(workspace.project)
83+
const fetchLicenseEvidences: LicenseEvidenceFetcher = await this.makeLicenseEvidenceFetcher(workspace.project)
7984

8085
const setLicensesDeclared = function (license: License): void {
8186
license.acknowledgement = LicenseAcknowledgement.Declared
@@ -118,7 +123,8 @@ export class BomBuilder {
118123
}
119124
for await (const component of this.gatherDependencies(
120125
rootComponent, rootPackage,
121-
workspace.project, fetchManifest
126+
workspace.project,
127+
fetchManifest, fetchLicenseEvidences
122128
)) {
123129
component.licenses.forEach(setLicensesDeclared)
124130

@@ -162,33 +168,90 @@ export class BomBuilder {
162168
}
163169
}
164170

171+
readonly #LICENSE_FILENAME_PATTERN = /^(?:UN)?LICEN[CS]E|.\.LICEN[CS]E$|^NOTICE$/i
172+
173+
private async makeLicenseEvidenceFetcher (project: Project): Promise<LicenseEvidenceFetcher> {
174+
const fetcher = project.configuration.makeFetcher()
175+
const fetcherOptions: FetchOptions = {
176+
project,
177+
fetcher,
178+
cache: await Cache.find(project.configuration),
179+
checksums: project.storedChecksums,
180+
report: new ThrowReport(),
181+
cacheOptions: { skipIntegrityCheck: true }
182+
}
183+
const LICENSE_FILENAME_PATTERN = this.#LICENSE_FILENAME_PATTERN
184+
return async function * (pkg: Package): AsyncGenerator<License> {
185+
const { packageFs, prefixPath, releaseFs } = await fetcher.fetch(pkg, fetcherOptions)
186+
try {
187+
// option `withFileTypes:true` is not supported and causes crashes
188+
const files = packageFs.readdirSync(prefixPath)
189+
for (const file of files) {
190+
if (!LICENSE_FILENAME_PATTERN.test(file)) {
191+
continue
192+
}
193+
194+
const contentType = getMimeForLicenseFile(file)
195+
if (contentType === undefined) {
196+
continue
197+
}
198+
199+
const fp = ppath.join(prefixPath, file)
200+
yield new NamedLicense(
201+
`file: ${file}`,
202+
{
203+
text: new Attachment(
204+
packageFs.readFileSync(fp).toString('base64'),
205+
{
206+
contentType,
207+
encoding: AttachmentEncoding.Base64
208+
}
209+
)
210+
})
211+
}
212+
} finally {
213+
if (releaseFs !== undefined) {
214+
releaseFs()
215+
}
216+
}
217+
}
218+
}
219+
165220
private async makeComponentFromPackage (
166221
pkg: Package,
167222
fetchManifest: ManifestFetcher,
223+
fetchLicenseEvidence: LicenseEvidenceFetcher,
168224
type?: ComponentType | undefined
169225
): Promise<Component | false | undefined> {
170-
const data = await fetchManifest(pkg)
226+
const manifest = await fetchManifest(pkg)
171227
// the data in the manifest might be incomplete, so lets set the properties that yarn discovered and fixed
172228
/* eslint-disable-next-line @typescript-eslint/strict-boolean-expressions */
173-
data.name = pkg.scope ? `@${pkg.scope}/${pkg.name}` : pkg.name
174-
data.version = pkg.version
175-
return this.makeComponent(pkg, data, type)
229+
manifest.name = pkg.scope ? `@${pkg.scope}/${pkg.name}` : pkg.name
230+
manifest.version = pkg.version
231+
const component = this.makeComponent(pkg, manifest, type)
232+
if (this.gatherLicenseTexts && component instanceof Component) {
233+
component.evidence = new ComponentEvidence()
234+
for await (const le of fetchLicenseEvidence(pkg)) {
235+
component.evidence.licenses.add(le)
236+
}
237+
}
238+
return component
176239
}
177240

178-
private makeComponent (locator: Locator, data: any, type?: ComponentType | undefined): Component | false | undefined {
241+
private makeComponent (locator: Locator, manifest: any, type?: ComponentType | undefined): Component | false | undefined {
179242
// work with a deep copy, because `normalizePackageData()` might modify the data
180-
const dataC = structuredClonePolyfill(data)
181-
normalizePackageData(dataC as normalizePackageData.Input)
243+
const manifestC = structuredClonePolyfill(manifest)
244+
normalizePackageData(manifestC as normalizePackageData.Input)
182245
// region fix normalizations
183-
if (isString(data.version)) {
246+
if (isString(manifest.version)) {
184247
// allow non-SemVer strings
185-
dataC.version = data.version.trim()
248+
manifestC.version = manifest.version.trim()
186249
}
187250
// endregion fix normalizations
188251

189252
// work with a deep copy, because `normalizePackageData()` might modify the data
190253
const component = this.componentBuilder.makeComponent(
191-
dataC as normalizePackageData.Package, type)
254+
manifestC as normalizePackageData.Package, type)
192255
if (component === undefined) {
193256
this.console.debug('DEBUG | skip broken component: %j', locator)
194257
return undefined
@@ -296,7 +359,8 @@ export class BomBuilder {
296359
async * gatherDependencies (
297360
component: Component, pkg: Package,
298361
project: Project,
299-
fetchManifest: ManifestFetcher
362+
fetchManifest: ManifestFetcher,
363+
fetchLicenseEvidences: LicenseEvidenceFetcher
300364
): AsyncGenerator<Component> {
301365
// ATTENTION: multiple packages may have the same `identHash`, but the `locatorHash` is unique.
302366
const knownComponents = new Map<LocatorHash, Component>([[pkg.locatorHash, component]])
@@ -308,7 +372,8 @@ export class BomBuilder {
308372
let depComponent = knownComponents.get(depPkg.locatorHash)
309373
if (depComponent === undefined) {
310374
const _depIDN = structUtils.prettyLocatorNoColors(depPkg)
311-
const _depC = await this.makeComponentFromPackage(depPkg, fetchManifest)
375+
const _depC = await this.makeComponentFromPackage(depPkg,
376+
fetchManifest, fetchLicenseEvidences)
312377
if (_depC === false) {
313378
// shall be skipped
314379
this.console.debug('DEBUG | skip impossible component %j', _depIDN)

src/commands.ts

+8-1
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ export class MakeSbomCommand extends Command<CommandContext> {
112112
'This might result in loss of time- and random-based values.'
113113
})
114114

115+
gatherLicenseTexts = Option.Boolean('--gather-license-texts', false, {
116+
description: 'Search for license files in components and include them as license evidence.\n' +
117+
'This feature is experimental.'
118+
})
119+
115120
verbosity = Option.Counter('--verbose,-v', 1, {
116121
description: 'Increase the verbosity of messages.\n' +
117122
'Use multiple times to increase the verbosity even more.'
@@ -142,6 +147,7 @@ export class MakeSbomCommand extends Command<CommandContext> {
142147
mcType: this.mcType,
143148
shortPURLs: this.shortPURLs,
144149
outputReproducible: this.outputReproducible,
150+
gatherLicenseTexts: this.gatherLicenseTexts,
145151
verbosity: this.verbosity,
146152
projectDir
147153
})
@@ -171,7 +177,8 @@ export class MakeSbomCommand extends Command<CommandContext> {
171177
omitDevDependencies: this.production,
172178
metaComponentType: this.mcType,
173179
reproducible: this.outputReproducible,
174-
shortPURLs: this.shortPURLs
180+
shortPURLs: this.shortPURLs,
181+
gatherLicenseTexts: this.gatherLicenseTexts
175182
},
176183
myConsole
177184
)).buildFromWorkspace(workspace)

tests/README.md

+8-2
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,20 @@ Test files must follow the pattern `**.{spec,test}.[cm]?js`, to be picked up.
1515
Test runner is `mocha`, configured in [mocharc file](../.mocharc.js).
1616

1717
```shell
18-
npm test
18+
yarn run test
1919
```
20+
21+
To run specific tests only
22+
```shell
23+
yarn run test:node --grep "testname"
24+
```
25+
2026
### Snapshots
2127

2228
Some tests check against snapshots.
2329
To update these, set the env var `CYARN_TEST_UPDATE_SNAPSHOTS` to a non-falsy value.
2430

2531
like so:
2632
```shell
27-
CYARN_TEST_UPDATE_SNAPSHOTS=1 npm test
33+
CYARN_TEST_UPDATE_SNAPSHOTS=1 yarn run test
2834
```

0 commit comments

Comments
 (0)