@@ -8,78 +8,79 @@ import { Page } from "mongodb-rag-core";
8
8
9
9
jest . setTimeout ( 60000 ) ;
10
10
11
- export function samplePathToPage ( pathInRepo : string ) {
12
- if ( pathInRepo . endsWith ( "_index.md" ) ) {
13
- pathInRepo = pathInRepo . replace ( "_index.md" , "index.md" ) ;
14
- }
15
- return pathInRepo
16
- . replace ( / ^ d o c s \/ c o n t e n t \/ m o n g o c x x - v 3 / , "https://example/com" )
17
- . replace ( / \. m d $ / , "/" ) ;
18
- }
19
- const sampleConf : MakeMdOnGithubDataSourceParams = {
20
- name : "sample" ,
21
- repoUrl : "https://github.com/mongodb/mongo-cxx-driver/" ,
11
+ const baseChatbotRepoConfig : MakeMdOnGithubDataSourceParams = {
12
+ name : "chatbot" ,
13
+ repoUrl : "https://github.com/mongodb/chatbot" ,
22
14
repoLoaderOptions : {
23
- branch : "master" ,
24
- ignoreFiles : [ / ^ (? ! ^ \/ d o c s \/ c o n t e n t \/ m o n g o c x x - v 3 \/ ) .* / ] ,
25
- } ,
26
- pathToPageUrl : samplePathToPage ,
27
- metadata : {
28
- productName : "C++ Driver (mongocxx)" ,
15
+ branch : "main" ,
29
16
} ,
17
+ pathToPageUrl : ( path ) => path ,
18
+ extractMetadata : ( ) => ( {
19
+ foo : "bar" ,
20
+ } ) ,
21
+ } ;
22
+
23
+ const mongodbCorpConfig : MakeMdOnGithubDataSourceParams = {
24
+ ...baseChatbotRepoConfig ,
25
+ name : "mongodb-corp" ,
30
26
frontMatter : {
31
27
process : true ,
32
- separator : "+++" ,
33
- format : "toml" ,
28
+ separator : "---" ,
29
+ format : "yaml" ,
30
+ } ,
31
+ metadata : {
32
+ productName : "MongoDB Corp" ,
34
33
} ,
34
+ filter : ( path ) => path . includes ( "mongodb-corp" ) ,
35
35
extractTitle : ( _ , frontmatter ) => ( frontmatter ?. title as string ) ?? null ,
36
- extractMetadata : ( ) => ( {
37
- foo : "bar" ,
38
- } ) ,
39
36
} ;
37
+
38
+ const ingestTestDataConfig : MakeMdOnGithubDataSourceParams = {
39
+ ...baseChatbotRepoConfig ,
40
+ name : "ingest_testData" ,
41
+ metadata : {
42
+ productName : "Ingest Test Data" ,
43
+ } ,
44
+ filter : ( path ) => path . includes ( "ingest/testData" ) ,
45
+ } ;
46
+
40
47
describe ( "MdOnGithubDataSource" , ( ) => {
41
48
let pages : Page [ ] ;
49
+ const samplePages : Record < string , Page | undefined > = { } ;
50
+ const getSamplePage = ( path : string ) => {
51
+ const samplePage = samplePages [ path ] ;
52
+ assert ( samplePage ) ;
53
+ return samplePage ;
54
+ } ;
42
55
beforeAll ( async ( ) => {
43
- const dataSource = await makeMdOnGithubDataSource ( sampleConf ) ;
56
+ const dataSource = await makeMdOnGithubDataSource ( mongodbCorpConfig ) ;
44
57
pages = await dataSource . fetchPages ( ) ;
58
+ samplePages [ "mongodb-corp/chatbot/overview.md" ] = pages . find ( ( page ) => {
59
+ return page . url . includes ( "mongodb-corp/chatbot/overview.md" ) ;
60
+ } ) ;
45
61
} ) ;
46
62
it ( "loads and processes a real repo of markdown files" , async ( ) => {
47
- const samplePage = pages . find ( ( page ) =>
48
- page . title ?. includes ( "Installing the mongocxx driver" )
49
- ) ;
63
+ const samplePage = getSamplePage ( "mongodb-corp/chatbot/overview.md" ) ;
50
64
assert ( samplePage ) ;
51
- expect ( samplePage ?. body ) . toContain ( "install" ) ;
65
+ expect ( samplePage ?. body ) . toContain (
66
+ "The MongoDB AI is an advanced LLM-based chatbot"
67
+ ) ;
52
68
} ) ;
53
69
it ( "processes metadata" , ( ) => {
54
- const samplePage = pages [ 0 ] ;
70
+ const samplePage = getSamplePage ( "mongodb-corp/chatbot/overview.md" ) ;
55
71
expect ( samplePage . metadata ) . toHaveProperty ( "foo" , "bar" ) ;
56
- expect ( samplePage . metadata ) . toHaveProperty (
57
- "productName" ,
58
- "C++ Driver (mongocxx)"
59
- ) ;
72
+ expect ( samplePage . metadata ) . toHaveProperty ( "productName" , "MongoDB Corp" ) ;
60
73
} ) ;
61
74
it ( "removes frontmatter from page body" , ( ) => {
62
- const samplePage = pages [ 0 ] ;
63
- expect ( samplePage . body ) . not . toContain ( "+++ " ) ;
75
+ const samplePage = getSamplePage ( "mongodb-corp/chatbot/overview.md" ) ;
76
+ expect ( samplePage . body ) . not . toContain ( "--- " ) ;
64
77
} ) ;
65
78
it ( "extracts title from frontmatter" , ( ) => {
66
- const samplePage = pages [ 0 ] ;
79
+ const samplePage = getSamplePage ( "mongodb-corp/chatbot/overview.md" ) ;
67
80
expect ( samplePage . title ) . toBeTruthy ( ) ;
68
81
} ) ;
69
82
it ( "works with .mdx files" , async ( ) => {
70
- const sampleConf : MakeMdOnGithubDataSourceParams = {
71
- name : "sample" ,
72
- repoUrl : "https://github.com/mongodb/chatbot" ,
73
- repoLoaderOptions : {
74
- branch : "main" ,
75
- } ,
76
- pathToPageUrl : ( path ) => path ,
77
- metadata : {
78
- productName : "C++ Driver (mongocxx)" ,
79
- } ,
80
- filter : ( path ) => path . includes ( "ingest/testData" ) ,
81
- } ;
82
- const dataSource = await makeMdOnGithubDataSource ( sampleConf ) ;
83
+ const dataSource = await makeMdOnGithubDataSource ( ingestTestDataConfig ) ;
83
84
const pages = await dataSource . fetchPages ( ) ;
84
85
expect ( pages . length ) . toBeGreaterThan ( 1 ) ;
85
86
expect (
0 commit comments