|
1 | 1 | [
|
| 2 | + { |
| 3 | + "category": ["DigitalAIAgents"], |
| 4 | + "title": "OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments", |
| 5 | + "authors": "Tianbao Xie, Danyang Zhang, Jixuan Chen, Xiaochuan Li, Siheng Zhao, Ruisheng Cao, Toh Jing Hua, Zhoujun Cheng, Dongchan Shin, Fangyu Lei, Yitao Liu, Yiheng Xu, Shuyan Zhou, Silvio Savarese, Caiming Xiong, Victor Zhong, Tao Yu", |
| 6 | + "publication": "NeurIPS 2024", |
| 7 | + "paperLink": "https://arxiv.org/abs/2404.07972", |
| 8 | + "codeLink": "https://github.com/xlang-ai/OSWorld", |
| 9 | + "dataLink": "https://github.com/xlang-ai/OSWorld/tree/main/evaluation_examples", |
| 10 | + "blogLink": "https://os-world.github.io/", |
| 11 | + "twitterLink": "https://twitter.com/TianbaoX/status/1778781521253667267", |
| 12 | + "image": "/research/osworld.png" |
| 13 | + }, |
2 | 14 | {
|
3 | 15 | "category": ["DigitalAIAgents", "Others"],
|
4 | 16 | "title": "AgentTrek: Agent Trajectory Synthesis via Guiding Replay with Web Tutorials",
|
|
83 | 95 | "twitterLink": "https://twitter.com/Muennighoff/status/1758307967802224770",
|
84 | 96 | "image": "/research/grit.jpeg"
|
85 | 97 | },
|
86 |
| - { |
87 |
| - "category": ["DigitalAIAgents"], |
88 |
| - "title": "OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments", |
89 |
| - "authors": "Tianbao Xie, Danyang Zhang, Jixuan Chen, Xiaochuan Li, Siheng Zhao, Ruisheng Cao, Toh Jing Hua, Zhoujun Cheng, Dongchan Shin, Fangyu Lei, Yitao Liu, Yiheng Xu, Shuyan Zhou, Silvio Savarese, Caiming Xiong, Victor Zhong, Tao Yu", |
90 |
| - "publication": "NeurIPS 2024", |
91 |
| - "paperLink": "https://arxiv.org/abs/2404.07972", |
92 |
| - "codeLink": "https://github.com/xlang-ai/OSWorld", |
93 |
| - "dataLink": "https://github.com/xlang-ai/OSWorld/tree/main/evaluation_examples", |
94 |
| - "blogLink": "https://os-world.github.io/", |
95 |
| - "twitterLink": "https://twitter.com/TianbaoX/status/1778781521253667267", |
96 |
| - "image": "/research/osworld.png" |
97 |
| - }, |
98 | 98 | {
|
99 | 99 | "category": ["CodeGeneration", "DigitalAIAgents"],
|
100 | 100 | "title": "Spider2-V: How Far Are Multimodal Agents From Automating Data Science and Engineering Workflows?",
|
|
0 commit comments