diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d163863
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+build/
\ No newline at end of file
diff --git a/README.md b/README.md
index 20ee451..4fef6dc 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,65 @@ Vulkan Grass Rendering
 
 **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 5**
 
-* (TODO) YOUR NAME HERE
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* XiaoyuDu
+* Tested on: Windows 10, i9-11900KF @ 3.50GHz, RTX 3080 (Personal PC)
 
-### (TODO: Your README)
+### Description  
+This project reproduced the grass simulation methods proposed in the paper "Responsive real-time grass rendering for general 3D scenes" by Klemens Jahrmann and Michael Wimmer. The whole project is driven by Vulkan and openGL. All the calculations proposed in this paper are performed all on the shaders, making it appliable to different kinds of real-time scenarios. The paper used the following models to describe a balde of grass.
+<p align="center">
+<img src="images/description/des1.png" width=40% height=40% align="center">
+</p>   
 
-*DO NOT* leave the README to the last minute! It is a crucial part of the
-project, and we will not be able to grade you without a good README.
+`v0`, `v1` and `v2` and the three control points which forms a beizier curve to represent the blade using Tessellation shaders.  
+`height` represents the length of the blade.  
+`direction` represents the orientation of the blade.  
+`width` represents the length of the cross-section width of the blade.  
+`up-vector` represents the normal of the ground.  
+<p align="center">
+<img src="images/description/des2.png" width=40% height=40% align="center">
+</p>   
+
+Besides the grass model, different forces are also been considered and been modeled in the paper, as the graph shwon above.  
+`gravity` includes enviornmental gravity and front gravity which simulates the elasticity of a blade.  
+`recovery` is a force that against any force that try to bends itself.  
+`wind` is a force that changes from moment to moment, which can make the whole grass simulation look more realistic.  
+  
+In this project, I implemented the basic graphics and compute pipeline to render the grass and compute the forces that effect each blade. Three forces I mentioned above has all been taken into account, and three culling methods are also been implemented to further increase the performance.    
+
+### Feature  
+* Basic pipeline  
+I first built the graphics pipeline using Vulkan and try to render the grass out. Based on the paper, I used a Tessellation shader with a inner and outer layer of 16 to compute the Bezier curve for each blade to generate smooth shape for each blade. The position and direction of each blade are generated randomly. The total number of blades are set to 2^10. The result looks like the picture shown below.  
+<p align="center">
+<img src="images/part1/procedure1.png" width=60% height=60% align="center">
+</p>  
+  
+* Forces  
+After I finished the graphics pipeline, I started to generate the compute pipeline and added the effects of the three forces in the compute shader. The effects of each force are implemented based on their models mentioend in the paper. The gravity force includes a enviornmental universal gravity and a front gravity for each blade and they always act on the v2 point of each blade. Each blade always has a recover force that try to make itself straight up again. the wind force are simulated using sin and cos function with the uniform time variable so that the direction of the wind is always changing. Now, the simulation result looks like the graph shown below. 
+<p align="center">
+<img src="images/part1/procedure2.gif" width=60% height=60% align="center">
+</p>  
+  
+  
+* Culling Method
+Then, three kinds of culling methods are implemented to make the performance better. The first method is called orientation culling which wipes out the balde with direction similar to the view direction of camera, because for those blades, we can only see their sides and thus we may don't need to render them. The second method is called view-frustum culling, which wipe out all the blade outside the current view space. The third method is called distance culling that wipes out the blades that are far away from the camera. The results of three culling method are shown below.   
+At first gif, you can see that each blade with similar direction to camera's view will disappear. At second gif, you can see that blade at the corner of the camera's view space will tend to disappear. At the third gif, you can see that blade will disappear once they are far away.  
+
+| Orientation Culling | View-Frustum Culling | Distance Culling  |
+| --- | --- | --- |
+| ![](images/part1/orientation.gif) | ![](images/part1/viewfrustum.gif) | ![](images/part1/distance.gif) |
+  
+A result that integrate all of the above methods is shown below.  
+<p align="center">
+<img src="images/part1/procedure3.gif" width=70% height=70% align="center">
+</p>   
+  
+### Performance Analysis  
+In the chart below, I compare the FPS between different number of blades. The result totally make sense that with more number of blades, the FPS will shrink. You can see that even with 2^17 blades, the FPS still can be as high as about 52.  
+<p align="center">
+<img src="images/part2/Picture1.png" width=70% height=70% align="center">
+</p>    
+   
+This chart below compares the effects of different culling method. The interesting thing to notice is that in some cases, view frustum cull and distance cull actually make performance worse. Later on I found out that the problem is not on my implementation, but on the scene itself.  view frustum cull and distance cull are expensive, especially for frustum cull that we have to use two points of blade and one mid point been interpolated and do a bound-check operation on all of them. Thus, if there are not too many blades been wiped out, the expenses for doing the check will become higher than the performance saved by wiping out blades that we don't need to render. In this chart, I did all the test using the default camera position where it is at the middle of the grass land. Not many blades were culled. However, When I switch the camera to the middle and be really close to the ground and test for view frustum cull, it outperform every other method. when I switch the camera to really far away and only a few blades left, the distance cull outperform every other methods. So I think this two method is highly scene-dependent.  
+<p align="center">
+<img src="images/part2/Picture2.png" width=80% height=80% align="center">
+</p>   
diff --git a/bin/Debug/vulkan_grass_rendering.exe b/bin/Debug/vulkan_grass_rendering.exe
new file mode 100644
index 0000000..0aebbe5
Binary files /dev/null and b/bin/Debug/vulkan_grass_rendering.exe differ
diff --git a/bin/Debug/vulkan_grass_rendering.pdb b/bin/Debug/vulkan_grass_rendering.pdb
new file mode 100644
index 0000000..be452d2
Binary files /dev/null and b/bin/Debug/vulkan_grass_rendering.pdb differ
diff --git a/bin/Release/vulkan_grass_rendering.exe b/bin/Release/vulkan_grass_rendering.exe
index f68db3a..1c3c8c4 100644
Binary files a/bin/Release/vulkan_grass_rendering.exe and b/bin/Release/vulkan_grass_rendering.exe differ
diff --git a/images/description/des1.png b/images/description/des1.png
new file mode 100644
index 0000000..1d62be0
Binary files /dev/null and b/images/description/des1.png differ
diff --git a/images/description/des2.png b/images/description/des2.png
new file mode 100644
index 0000000..65f4089
Binary files /dev/null and b/images/description/des2.png differ
diff --git a/images/part1/distance.gif b/images/part1/distance.gif
new file mode 100644
index 0000000..25131f6
Binary files /dev/null and b/images/part1/distance.gif differ
diff --git a/images/part1/orientation.gif b/images/part1/orientation.gif
new file mode 100644
index 0000000..739ac69
Binary files /dev/null and b/images/part1/orientation.gif differ
diff --git a/images/part1/procedure1.png b/images/part1/procedure1.png
new file mode 100644
index 0000000..98722c4
Binary files /dev/null and b/images/part1/procedure1.png differ
diff --git a/images/part1/procedure2.gif b/images/part1/procedure2.gif
new file mode 100644
index 0000000..16dcd11
Binary files /dev/null and b/images/part1/procedure2.gif differ
diff --git a/images/part1/procedure3.gif b/images/part1/procedure3.gif
new file mode 100644
index 0000000..17e8e42
Binary files /dev/null and b/images/part1/procedure3.gif differ
diff --git a/images/part1/viewfrustum.gif b/images/part1/viewfrustum.gif
new file mode 100644
index 0000000..6316d87
Binary files /dev/null and b/images/part1/viewfrustum.gif differ
diff --git a/images/part2/Picture1.png b/images/part2/Picture1.png
new file mode 100644
index 0000000..be8e585
Binary files /dev/null and b/images/part2/Picture1.png differ
diff --git a/images/part2/Picture2.png b/images/part2/Picture2.png
new file mode 100644
index 0000000..08bef78
Binary files /dev/null and b/images/part2/Picture2.png differ
diff --git a/src/Blades.h b/src/Blades.h
index 9bd1eed..09ab045 100644
--- a/src/Blades.h
+++ b/src/Blades.h
@@ -4,6 +4,7 @@
 #include <array>
 #include "Model.h"
 
+// 1 << 13
 constexpr static unsigned int NUM_BLADES = 1 << 13;
 constexpr static float MIN_HEIGHT = 1.3f;
 constexpr static float MAX_HEIGHT = 2.5f;
diff --git a/src/Instance.cpp b/src/Instance.cpp
index 7f6b01c..e32ece2 100644
--- a/src/Instance.cpp
+++ b/src/Instance.cpp
@@ -1,8 +1,9 @@
-#include <stdexcept>
+﻿#include <stdexcept>
 #include <set>
 #include <vector>
 #include "Instance.h"
 
+// NDEBUG is a C++ macro, determine if the program is in debug mode or release mode
 #ifdef NDEBUG
 const bool ENABLE_VALIDATION = false;
 #else
@@ -258,6 +259,7 @@ void Instance::PickPhysicalDevice(std::vector<const char*> deviceExtensions, Que
             }
         }
 
+        //如果需要present queue family，说明要显示，那么需要检查设备支持的Surface的属性
         if (requiredQueues[QueueFlags::Present]) {
             // Get basic surface capabilities
             vkGetPhysicalDeviceSurfaceCapabilitiesKHR(device, surface, &surfaceCapabilities);
@@ -350,6 +352,9 @@ Device* Instance::CreateDevice(QueueFlagBits requiredQueues, VkPhysicalDeviceFea
         throw std::runtime_error("Failed to create logical device");
     }
 
+    //queue是和logical device一起自动创建的，我们需要拿到存储这些queue的句柄
+    // queues就是拿到所有required queues的handle
+    //如果queue family的index都是一样的，那么最后queues里的queue都是同一个
     Device::Queues queues;
     for (unsigned int i = 0; i < requiredQueues.size(); ++i) {
         if (requiredQueues[i]) {
diff --git a/src/Renderer.cpp b/src/Renderer.cpp
index b445d04..a0216ea 100644
--- a/src/Renderer.cpp
+++ b/src/Renderer.cpp
@@ -1,4 +1,4 @@
-#include "Renderer.h"
+﻿#include "Renderer.h"
 #include "Instance.h"
 #include "ShaderModule.h"
 #include "Vertex.h"
@@ -15,8 +15,8 @@ Renderer::Renderer(Device* device, SwapChain* swapChain, Scene* scene, Camera* c
     scene(scene),
     camera(camera) {
 
-    CreateCommandPools();
-    CreateRenderPass();
+    CreateCommandPools(); //commandPool管理用于commandBuffers的内存
+    CreateRenderPass();  //render pass决定渲染时如何使用frame buffer
     CreateCameraDescriptorSetLayout();
     CreateModelDescriptorSetLayout();
     CreateTimeDescriptorSetLayout();
@@ -27,8 +27,8 @@ Renderer::Renderer(Device* device, SwapChain* swapChain, Scene* scene, Camera* c
     CreateGrassDescriptorSets();
     CreateTimeDescriptorSet();
     CreateComputeDescriptorSets();
-    CreateFrameResources();
-    CreateGraphicsPipeline();
+    CreateFrameResources();  //创建imageView和frameBuffer
+    CreateGraphicsPipeline();  //包括所有的可编程stages（shaderModule）、fixed-function stages、renderPass等
     CreateGrassPipeline();
     CreateComputePipeline();
     RecordCommandBuffers();
@@ -36,9 +36,13 @@ Renderer::Renderer(Device* device, SwapChain* swapChain, Scene* scene, Camera* c
 }
 
 void Renderer::CreateCommandPools() {
+    //Each command pool can only allocate command buffers that are submitted on a single type of queue
+    //有了command pool才可以开始allocate command buffer。
     VkCommandPoolCreateInfo graphicsPoolInfo = {};
     graphicsPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
     graphicsPoolInfo.queueFamilyIndex = device->GetInstance()->GetQueueFamilyIndices()[QueueFlags::Graphics];
+    //VK_COMMAND_POOL_CREATE_TRANSIENT_BIT：提示command buffer总是会加入新命令。1
+    //VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT：Allow command buffers to be rerecorded individually, without this flag they all have to be reset together. 2
     graphicsPoolInfo.flags = 0;
 
     if (vkCreateCommandPool(logicalDevice, &graphicsPoolInfo, nullptr, &graphicsCommandPool) != VK_SUCCESS) {
@@ -56,19 +60,35 @@ void Renderer::CreateCommandPools() {
 }
 
 void Renderer::CreateRenderPass() {
+    // 有关渲染时将使用的frame buffer attachment的信息
     // Color buffer attachment represented by one of the images from the swap chain
     VkAttachmentDescription colorAttachment = {};
     colorAttachment.format = swapChain->GetVkImageFormat();
+    //多重采样
     colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT;
+    //渲染前如何处理attachment中的颜色和深度数据
+    //VK_ATTACHMENT_LOAD_OP_LOAD：保留现有内容
+    //VK_ATTACHMENT_LOAD_OP_CLEAR：清除为某一常量
+    //VK_ATTACHMENT_LOAD_OP_DONT_CARE
     colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
+    //渲染后如何处理attachment中的颜色和深度数据
+    //VK_ATTACHMENT_STORE_OP_STORE：渲染内容将存储在内存中，之后可以读取
+    //VK_ATTACHMENT_STORE_OP_DONT_CARE
     colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+    //同上，只不过是stencil数据
     colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
     colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
+    //Vulkan 中的纹理和帧缓存由具有特定像素格式的VkImage对象表示，但内存中的像素layout会根据您要对图像进行的操作而发生变化。
+    //initialLayout指定了图像在开始渲染之前的layout
+    //finalLayout指定了渲染过程结束后自动过渡到的layout
+    //VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: 在交换链中显示的图像
     colorAttachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
     colorAttachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
 
     // Create a color attachment reference to be used with subpass
+    // subpass会用到
     VkAttachmentReference colorAttachmentRef = {};
+    //通过index引用attachments数组中的attachment
     colorAttachmentRef.attachment = 0;
     colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
 
@@ -198,6 +218,40 @@ void Renderer::CreateComputeDescriptorSetLayout() {
     // TODO: Create the descriptor set layout for the compute pipeline
     // Remember this is like a class definition stating why types of information
     // will be stored at each binding
+    
+    //Blade has three buffers, thus we create three bindings for them.
+    VkDescriptorSetLayoutBinding bladeLayoutBinding = {};
+    bladeLayoutBinding.binding = 0;
+    bladeLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+    bladeLayoutBinding.descriptorCount = 1;
+    bladeLayoutBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;  //which pipeline shader stages can access a resource for this binding
+    bladeLayoutBinding.pImmutableSamplers = nullptr;
+
+    VkDescriptorSetLayoutBinding cullLayoutBinding = {};
+    cullLayoutBinding.binding = 1;
+    cullLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+    cullLayoutBinding.descriptorCount = 1;
+    cullLayoutBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
+    cullLayoutBinding.pImmutableSamplers = nullptr;
+
+    VkDescriptorSetLayoutBinding numLayoutBinding = {};
+    numLayoutBinding.binding = 2;
+    numLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+    numLayoutBinding.descriptorCount = 1;
+    numLayoutBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
+    numLayoutBinding.pImmutableSamplers = nullptr;
+
+    std::vector<VkDescriptorSetLayoutBinding> bindings = { bladeLayoutBinding, cullLayoutBinding, numLayoutBinding };
+
+    VkDescriptorSetLayoutCreateInfo layoutInfo = {};
+    layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+    layoutInfo.bindingCount = static_cast<uint32_t>(bindings.size());
+    layoutInfo.pBindings = bindings.data();
+
+    if (vkCreateDescriptorSetLayout(logicalDevice, &layoutInfo, nullptr, &computeDescriptorSetLayout) != VK_SUCCESS) {
+        throw std::runtime_error("Failed to create compute descriptor set layout");
+    }
+
 }
 
 void Renderer::CreateDescriptorPool() {
@@ -216,6 +270,8 @@ void Renderer::CreateDescriptorPool() {
         { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 1 },
 
         // TODO: Add any additional types and counts of descriptors you will need to allocate
+        // Compute
+        { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER , static_cast<uint32_t>(scene->GetBlades().size()) }
     };
 
     VkDescriptorPoolCreateInfo poolInfo = {};
@@ -320,6 +376,42 @@ void Renderer::CreateModelDescriptorSets() {
 void Renderer::CreateGrassDescriptorSets() {
     // TODO: Create Descriptor sets for the grass.
     // This should involve creating descriptor sets which point to the model matrix of each group of grass blades
+    grassDescriptorSets.resize(scene->GetBlades().size());
+
+    // Describe the desciptor set
+    VkDescriptorSetLayout layouts[] = { modelDescriptorSetLayout };
+    VkDescriptorSetAllocateInfo allocInfo = {};
+    allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+    allocInfo.descriptorPool = descriptorPool;
+    allocInfo.descriptorSetCount = static_cast<uint32_t>(grassDescriptorSets.size());
+    allocInfo.pSetLayouts = layouts;
+
+    // Allocate descriptor sets
+    if (vkAllocateDescriptorSets(logicalDevice, &allocInfo, grassDescriptorSets.data()) != VK_SUCCESS) {
+        throw std::runtime_error("Failed to allocate grass descriptor set");
+    }
+
+    std::vector<VkWriteDescriptorSet> descriptorWrites(grassDescriptorSets.size());
+
+    for (uint32_t i = 0; i < scene->GetBlades().size(); ++i) {
+        VkDescriptorBufferInfo grassBufferInfo = {};
+        grassBufferInfo.buffer = scene->GetBlades()[i]->GetModelBuffer();
+        grassBufferInfo.offset = 0;
+        grassBufferInfo.range = sizeof(ModelBufferObject);
+
+        descriptorWrites[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+        descriptorWrites[i].dstSet = grassDescriptorSets[i];
+        descriptorWrites[i].dstBinding = 0;
+        descriptorWrites[i].dstArrayElement = 0;
+        descriptorWrites[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+        descriptorWrites[i].descriptorCount = 1;
+        descriptorWrites[i].pBufferInfo = &grassBufferInfo;
+        descriptorWrites[i].pImageInfo = nullptr;
+        descriptorWrites[i].pTexelBufferView = nullptr;
+    }
+
+    // Update descriptor sets
+    vkUpdateDescriptorSets(logicalDevice, static_cast<uint32_t>(descriptorWrites.size()), descriptorWrites.data(), 0, nullptr);
 }
 
 void Renderer::CreateTimeDescriptorSet() {
@@ -360,9 +452,79 @@ void Renderer::CreateTimeDescriptorSet() {
 void Renderer::CreateComputeDescriptorSets() {
     // TODO: Create Descriptor sets for the compute pipeline
     // The descriptors should point to Storage buffers which will hold the grass blades, the culled grass blades, and the output number of grass blades 
+    
+    computeDescriptorSets.resize(scene->GetBlades().size());
+
+    // Describe the desciptor set
+    VkDescriptorSetLayout layouts[] = { computeDescriptorSetLayout };
+    VkDescriptorSetAllocateInfo allocInfo = {};
+    allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+    allocInfo.descriptorPool = descriptorPool;
+    allocInfo.descriptorSetCount = static_cast<uint32_t>(computeDescriptorSets.size());
+    allocInfo.pSetLayouts = layouts;
+
+    // Allocate descriptor sets
+    if (vkAllocateDescriptorSets(logicalDevice, &allocInfo, computeDescriptorSets.data()) != VK_SUCCESS) {
+        throw std::runtime_error("Failed to allocate compute descriptor set");
+    }
+
+    //for each blade, three buffers.
+    std::vector<VkWriteDescriptorSet> descriptorWrites(3 * computeDescriptorSets.size());
+
+    for (uint32_t i = 0; i < scene->GetBlades().size(); ++i) {
+        VkDescriptorBufferInfo bladeBufferInfo = {};
+        bladeBufferInfo.buffer = scene->GetBlades()[i]->GetBladesBuffer();
+        bladeBufferInfo.offset = 0;
+        bladeBufferInfo.range = NUM_BLADES * sizeof(Blade);
+        
+        descriptorWrites[3 * i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+        descriptorWrites[3 * i].dstSet = computeDescriptorSets[i];
+        descriptorWrites[3 * i].dstBinding = 0;
+        descriptorWrites[3 * i].dstArrayElement = 0;
+        descriptorWrites[3 * i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+        descriptorWrites[3 * i].descriptorCount = 1;
+        descriptorWrites[3 * i].pBufferInfo = &bladeBufferInfo;
+        descriptorWrites[3 * i].pImageInfo = nullptr;
+        descriptorWrites[3 * i].pTexelBufferView = nullptr;
+
+        VkDescriptorBufferInfo cullBufferInfo = {};
+        cullBufferInfo.buffer = scene->GetBlades()[i]->GetCulledBladesBuffer();
+        cullBufferInfo.offset = 0;
+        cullBufferInfo.range = NUM_BLADES * sizeof(Blade);
+
+        descriptorWrites[3 * i + 1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+        descriptorWrites[3 * i + 1].dstSet = computeDescriptorSets[i];
+        descriptorWrites[3 * i + 1].dstBinding = 1;
+        descriptorWrites[3 * i + 1].dstArrayElement = 0;
+        descriptorWrites[3 * i + 1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+        descriptorWrites[3 * i + 1].descriptorCount = 1;
+        descriptorWrites[3 * i + 1].pBufferInfo = &cullBufferInfo;
+        descriptorWrites[3 * i + 1].pImageInfo = nullptr;
+        descriptorWrites[3 * i + 1].pTexelBufferView = nullptr;
+
+        VkDescriptorBufferInfo numBufferInfo = {};
+        numBufferInfo.buffer = scene->GetBlades()[i]->GetNumBladesBuffer();
+        numBufferInfo.offset = 0;
+        numBufferInfo.range = sizeof(BladeDrawIndirect);
+
+        descriptorWrites[3 * i + 2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+        descriptorWrites[3 * i + 2].dstSet = computeDescriptorSets[i];
+        descriptorWrites[3 * i + 2].dstBinding = 2;
+        descriptorWrites[3 * i + 2].dstArrayElement = 0;
+        descriptorWrites[3 * i + 2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+        descriptorWrites[3 * i + 2].descriptorCount = 1;
+        descriptorWrites[3 * i + 2].pBufferInfo = &numBufferInfo;
+        descriptorWrites[3 * i + 2].pImageInfo = nullptr;
+        descriptorWrites[3 * i + 2].pTexelBufferView = nullptr;
+    }
+
+    // Update descriptor sets
+    vkUpdateDescriptorSets(logicalDevice, static_cast<uint32_t>(descriptorWrites.size()), descriptorWrites.data(), 0, nullptr);
+
 }
 
 void Renderer::CreateGraphicsPipeline() {
+    //创建shaderModule，注意这里的文件已经是编译好的SPIR-V格式了
     VkShaderModule vertShaderModule = ShaderModule::Create("shaders/graphics.vert.spv", logicalDevice);
     VkShaderModule fragShaderModule = ShaderModule::Create("shaders/graphics.frag.spv", logicalDevice);
 
@@ -379,6 +541,7 @@ void Renderer::CreateGraphicsPipeline() {
     fragShaderStageInfo.module = fragShaderModule;
     fragShaderStageInfo.pName = "main";
 
+    // 可编程stages的数组
     VkPipelineShaderStageCreateInfo shaderStages[] = { vertShaderStageInfo, fragShaderStageInfo };
 
     // --- Set up fixed-function stages ---
@@ -424,12 +587,19 @@ void Renderer::CreateGraphicsPipeline() {
     // Rasterizer
     VkPipelineRasterizationStateCreateInfo rasterizer = {};
     rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
+    //如果为True，则超出近平面和远平面的片段就会被夹住，而不会被丢弃
     rasterizer.depthClampEnable = VK_FALSE;
+    //如果为True，那么几何体将永远不会通过光栅化阶段。这基本上禁止了向frame buffer的任何输出
     rasterizer.rasterizerDiscardEnable = VK_FALSE;
+    //决定fragment的生成方式，可以有FILL，LINE和POINT几种模式
     rasterizer.polygonMode = VK_POLYGON_MODE_FILL;
+    //以片段数来描述线条的粗细，任何粗于1.0f的线条都需要启用GPU的wideLines功能
     rasterizer.lineWidth = 1.0f;
-    rasterizer.cullMode = VK_CULL_MODE_BACK_BIT;
+    //要使用的面剔除类型，有VK_CULL_MODE_NONE、VK_CULL_MODE_FRONT_BIT、VK_CULL_MODE_BACK_BIT、VK_CULL_MODE_FRONT_AND_BACK
+    rasterizer.cullMode = VK_CULL_MODE_NONE;
+    //指定将面视为正面的顶点顺序，可以是顺时针或逆时针
     rasterizer.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
+    //rasterizer可以通过添加一个常量值或根据fragment的斜率偏置深度值来改变深度值。这有时会用于阴影贴图
     rasterizer.depthBiasEnable = VK_FALSE;
     rasterizer.depthBiasConstantFactor = 0.0f;
     rasterizer.depthBiasClamp = 0.0f;
@@ -454,7 +624,7 @@ void Renderer::CreateGraphicsPipeline() {
     depthStencil.depthBoundsTestEnable = VK_FALSE;
     depthStencil.minDepthBounds = 0.0f;
     depthStencil.maxDepthBounds = 1.0f;
-    depthStencil.stencilTestEnable = VK_FALSE;
+    depthStencil.stencilTestEnable = VK_TRUE;
 
     // Color blending (turned off here, but showing options for learning)
     // --> Configuration per attached framebuffer
@@ -509,7 +679,8 @@ void Renderer::CreateGraphicsPipeline() {
     pipelineInfo.pDynamicState = nullptr;
     pipelineInfo.layout = graphicsPipelineLayout;
     pipelineInfo.renderPass = renderPass;
-    pipelineInfo.subpass = 0;
+    pipelineInfo.subpass = 0; //subpass的index
+    //pipeline派生时使用
     pipelineInfo.basePipelineHandle = VK_NULL_HANDLE;
     pipelineInfo.basePipelineIndex = -1;
 
@@ -717,7 +888,7 @@ void Renderer::CreateComputePipeline() {
     computeShaderStageInfo.pName = "main";
 
     // TODO: Add the compute dsecriptor set layout you create to this list
-    std::vector<VkDescriptorSetLayout> descriptorSetLayouts = { cameraDescriptorSetLayout, timeDescriptorSetLayout };
+    std::vector<VkDescriptorSetLayout> descriptorSetLayouts = { cameraDescriptorSetLayout, timeDescriptorSetLayout, computeDescriptorSetLayout };
 
     // Create pipeline layout
     VkPipelineLayoutCreateInfo pipelineLayoutInfo = {};
@@ -759,6 +930,7 @@ void Renderer::CreateFrameResources() {
         createInfo.image = swapChain->GetVkImage(i);
 
         // Specify how the image data should be interpreted
+        // 通过viewType参数，可以将图像视为一维纹理、二维纹理、三维纹理和立方体贴图cube maps
         createInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
         createInfo.format = swapChain->GetVkImageFormat();
 
@@ -783,6 +955,7 @@ void Renderer::CreateFrameResources() {
 
     VkFormat depthFormat = device->GetInstance()->GetSupportedFormat({ VK_FORMAT_D32_SFLOAT, VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT }, VK_IMAGE_TILING_OPTIMAL, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT);
     // CREATE DEPTH IMAGE
+    // 我们只需要一个深度图像，因为一次只运行一个绘制操作
     Image::Create(device,
         swapChain->GetVkExtent().width,
         swapChain->GetVkExtent().height,
@@ -799,7 +972,6 @@ void Renderer::CreateFrameResources() {
     // Transition the image for use as depth-stencil
     Image::TransitionLayout(device, graphicsCommandPool, depthImage, depthFormat, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
 
-    
     // CREATE FRAMEBUFFERS
     framebuffers.resize(swapChain->GetCount());
     for (size_t i = 0; i < swapChain->GetCount(); i++) {
@@ -810,11 +982,14 @@ void Renderer::CreateFrameResources() {
 
         VkFramebufferCreateInfo framebufferInfo = {};
         framebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
+        //要兼容的renderPass
         framebufferInfo.renderPass = renderPass;
+        //VkImageViews对象
         framebufferInfo.attachmentCount = static_cast<uint32_t>(attachments.size());
         framebufferInfo.pAttachments = attachments.data();
         framebufferInfo.width = swapChain->GetVkExtent().width;
         framebufferInfo.height = swapChain->GetVkExtent().height;
+        //layers指的是图像数组中的层数。我们的交换链图像是单张图像，因此层数为1
         framebufferInfo.layers = 1;
 
         if (vkCreateFramebuffer(logicalDevice, &framebufferInfo, nullptr, &framebuffers[i]) != VK_SUCCESS) {
@@ -884,6 +1059,11 @@ void Renderer::RecordComputeCommandBuffer() {
     vkCmdBindDescriptorSets(computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipelineLayout, 1, 1, &timeDescriptorSet, 0, nullptr);
 
     // TODO: For each group of blades bind its descriptor set and dispatch
+    for (int i = 0; i < scene->GetBlades().size(); ++i) {
+        vkCmdBindDescriptorSets(computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipelineLayout, 2, 1, &computeDescriptorSets[i], 0, nullptr);
+        // WORKGROUP_SIZE should set to be the same at compute.comp as well
+        vkCmdDispatch(computeCommandBuffer, NUM_BLADES / WORKGROUP_SIZE, 1, 1);
+    }
 
     // ~ End recording ~
     if (vkEndCommandBuffer(computeCommandBuffer) != VK_SUCCESS) {
@@ -898,6 +1078,8 @@ void Renderer::RecordCommandBuffers() {
     VkCommandBufferAllocateInfo allocInfo = {};
     allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
     allocInfo.commandPool = graphicsCommandPool;
+    //VK_COMMAND_BUFFER_LEVEL_PRIMARY：可以提交到队列中执行，但不能从其他command buffer中调用。
+    //VK_COMMAND_BUFFER_LEVEL_SECONDARY：不能直接提交，但可以从primary command buffer中调用。
     allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
     allocInfo.commandBufferCount = static_cast<uint32_t>(commandBuffers.size());
 
@@ -909,7 +1091,11 @@ void Renderer::RecordCommandBuffers() {
     for (size_t i = 0; i < commandBuffers.size(); i++) {
         VkCommandBufferBeginInfo beginInfo = {};
         beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+        //VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT: command buffer在执行一次后将立即重新记录。
+        //VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT：这是一个辅助命令缓冲区，将完全在一次渲染过程中使用。
+        //VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT：该命令缓冲区可以在执行过程中重新提交。
         beginInfo.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
+        //与secondary command buffers有关
         beginInfo.pInheritanceInfo = nullptr;
 
         // ~ Start recording ~
@@ -948,6 +1134,8 @@ void Renderer::RecordCommandBuffers() {
         // Bind the camera descriptor set. This is set 0 in all pipelines so it will be inherited
         vkCmdBindDescriptorSets(commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipelineLayout, 0, 1, &cameraDescriptorSet, 0, nullptr);
 
+        //VK_SUBPASS_CONTENTS_INLINE: 渲染传递命令将嵌入主命令缓冲区本身，不会执行辅助命令缓冲区
+        //VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS: 将通过二级命令缓冲区执行render pass command
         vkCmdBeginRenderPass(commandBuffers[i], &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE);
 
         // Bind the graphics pipeline
@@ -976,13 +1164,14 @@ void Renderer::RecordCommandBuffers() {
             VkBuffer vertexBuffers[] = { scene->GetBlades()[j]->GetCulledBladesBuffer() };
             VkDeviceSize offsets[] = { 0 };
             // TODO: Uncomment this when the buffers are populated
-            // vkCmdBindVertexBuffers(commandBuffers[i], 0, 1, vertexBuffers, offsets);
+            vkCmdBindVertexBuffers(commandBuffers[i], 0, 1, vertexBuffers, offsets);
 
             // TODO: Bind the descriptor set for each grass blades model
+            vkCmdBindDescriptorSets(commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, grassPipelineLayout, 1, 1, &grassDescriptorSets[j], 0, nullptr);
 
             // Draw
             // TODO: Uncomment this when the buffers are populated
-            // vkCmdDrawIndirect(commandBuffers[i], scene->GetBlades()[j]->GetNumBladesBuffer(), 0, 1, sizeof(BladeDrawIndirect));
+            vkCmdDrawIndirect(commandBuffers[i], scene->GetBlades()[j]->GetNumBladesBuffer(), 0, 1, sizeof(BladeDrawIndirect));
         }
 
         // End render pass
@@ -1057,6 +1246,7 @@ Renderer::~Renderer() {
     vkDestroyDescriptorSetLayout(logicalDevice, cameraDescriptorSetLayout, nullptr);
     vkDestroyDescriptorSetLayout(logicalDevice, modelDescriptorSetLayout, nullptr);
     vkDestroyDescriptorSetLayout(logicalDevice, timeDescriptorSetLayout, nullptr);
+    vkDestroyDescriptorSetLayout(logicalDevice, computeDescriptorSetLayout, nullptr);
 
     vkDestroyDescriptorPool(logicalDevice, descriptorPool, nullptr);
 
diff --git a/src/Renderer.h b/src/Renderer.h
index 95e025f..d3a8756 100644
--- a/src/Renderer.h
+++ b/src/Renderer.h
@@ -56,12 +56,15 @@ class Renderer {
     VkDescriptorSetLayout cameraDescriptorSetLayout;
     VkDescriptorSetLayout modelDescriptorSetLayout;
     VkDescriptorSetLayout timeDescriptorSetLayout;
+    VkDescriptorSetLayout computeDescriptorSetLayout;
     
     VkDescriptorPool descriptorPool;
 
     VkDescriptorSet cameraDescriptorSet;
     std::vector<VkDescriptorSet> modelDescriptorSets;
+    std::vector<VkDescriptorSet> grassDescriptorSets;
     VkDescriptorSet timeDescriptorSet;
+    std::vector<VkDescriptorSet> computeDescriptorSets;
 
     VkPipelineLayout graphicsPipelineLayout;
     VkPipelineLayout grassPipelineLayout;
diff --git a/src/SwapChain.cpp b/src/SwapChain.cpp
index 711fec0..41642b3 100644
--- a/src/SwapChain.cpp
+++ b/src/SwapChain.cpp
@@ -1,4 +1,4 @@
-#include <vector>
+﻿#include <vector>
 #include "SwapChain.h"
 #include "Instance.h"
 #include "Device.h"
@@ -79,11 +79,18 @@ void SwapChain::Create() {
 
     const auto& surfaceCapabilities = instance->GetSurfaceCapabilities();
 
+    //选择最佳的format、presentation mode和swap extent。
+    //format优先 VK_FORMAT_B8G8R8A8_SRGB + VK_COLORSPACE_SRGB_NONLINEAR_KHR
+    //presentation mode优先 VK_PRESENT_MODE_MAILBOX_KHR （垂直同步+减少延迟）
+    //swap extent基本与屏幕一致
     VkSurfaceFormatKHR surfaceFormat = chooseSwapSurfaceFormat(instance->GetSurfaceFormats());
     VkPresentModeKHR presentMode = chooseSwapPresentMode(instance->GetPresentModes());
     VkExtent2D extent = chooseSwapExtent(surfaceCapabilities, GetGLFWWindow());
 
+    //如果仅仅坚持最小值，意味着我们有时可能需要等待驱动程序完成内部操作，
+    //才能获取另一张图像进行渲染，所以建议至少比最小值多请求一幅图像
     uint32_t imageCount = surfaceCapabilities.minImageCount + 1;
+    //希望imageCount大于numBuffers
     imageCount = numBuffers > imageCount ? numBuffers : imageCount;
     if (surfaceCapabilities.maxImageCount > 0 && imageCount > surfaceCapabilities.maxImageCount) {
         imageCount = surfaceCapabilities.maxImageCount;
@@ -101,10 +108,13 @@ void SwapChain::Create() {
     createInfo.imageFormat = surfaceFormat.format;
     createInfo.imageColorSpace = surfaceFormat.colorSpace;
     createInfo.imageExtent = extent;
+    //指定了每个图像的层数。除非开发的是立体 3D 应用程序，否则该值始终为1
     createInfo.imageArrayLayers = 1;
-    createInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+    //我们将swap chain中的图像用于何种操作
+    createInfo.imageUsage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
 
     const auto& queueFamilyIndices = instance->GetQueueFamilyIndices();
+    //根据queueFamily的数量，决定为framebuffer中的图像设置何种sharing的模式
     if (queueFamilyIndices[QueueFlags::Graphics] != queueFamilyIndices[QueueFlags::Present]) {
         // Images can be used across multiple queue families without explicit ownership transfers
         createInfo.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
@@ -126,6 +136,8 @@ void SwapChain::Create() {
     createInfo.preTransform = surfaceCapabilities.currentTransform;
 
     // Specify alpha channel usage (set to be ignored here)
+    // compositeAlpha specifies if the alpha channel should be used for
+    // blending with other windows in the window system
     createInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
 
     // Specify presentation mode
@@ -135,6 +147,10 @@ void SwapChain::Create() {
     createInfo.clipped = VK_TRUE;
 
     // Reference to old swap chain in case current one becomes invalid
+    // 在运行 Vulkan 应用程序时，交换链有可能失效或未优化，例如窗口大小被调整。
+    // 在这种情况下，交换链实际上需要从头开始重新创建，并且必须在该字段中指定对旧交换链的引用
+    // 之后再更改这里，现在假设窗口大小不发生改变
+    // TO DO
     createInfo.oldSwapchain = VK_NULL_HANDLE;
 
     // Create swap chain
@@ -143,6 +159,7 @@ void SwapChain::Create() {
     }
 
     // --- Retrieve swap chain images ---
+    // 拿到swap chain中所有VkImage的句柄，渲染操作中将引用这些句柄
     vkGetSwapchainImagesKHR(device->GetVkDevice(), vkSwapChain, &imageCount, nullptr);
     vkSwapChainImages.resize(imageCount);
     vkGetSwapchainImagesKHR(device->GetVkDevice(), vkSwapChain, &imageCount, vkSwapChainImages.data());
diff --git a/src/Window.cpp b/src/Window.cpp
index a365dc9..1d754b0 100644
--- a/src/Window.cpp
+++ b/src/Window.cpp
@@ -1,4 +1,4 @@
-#include <stdio.h>
+﻿#include <stdio.h>
 #include "Window.h"
 
 namespace {
@@ -10,6 +10,7 @@ GLFWwindow* GetGLFWWindow() {
 }
 
 void InitializeWindow(int width, int height, const char* name) {
+    //glfwInit() initialize GLFW library
     if (!glfwInit()) {
         fprintf(stderr, "Failed to initialize GLFW\n");
         exit(EXIT_FAILURE);
@@ -20,7 +21,10 @@ void InitializeWindow(int width, int height, const char* name) {
         exit(EXIT_FAILURE);
     }
 
+    //glfw最初目的是创建OpenGL context，所以要告诉它不要在后续
+    //调用中创建OpenGL context。
     glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
+    //create window
     window = glfwCreateWindow(width, height, name, nullptr, nullptr);
 
     if (!window) {
diff --git a/src/main.cpp b/src/main.cpp
index 8bf822b..b99df68 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,10 +1,11 @@
-#include <vulkan/vulkan.h>
+﻿#include <vulkan/vulkan.h>
 #include "Instance.h"
 #include "Window.h"
 #include "Renderer.h"
 #include "Camera.h"
 #include "Scene.h"
 #include "Image.h"
+#include <iostream>
 
 Device* device;
 SwapChain* swapChain;
@@ -67,27 +68,44 @@ namespace {
 
 int main() {
     static constexpr char* applicationName = "Vulkan Grass Rendering";
-    InitializeWindow(640, 480, applicationName);
+    //640, 480
+    InitializeWindow(1280, 960, applicationName);
 
     unsigned int glfwExtensionCount = 0;
     const char** glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount);
 
+    //Vulkan 是与平台无关的application API，不包含创建窗口来显示渲染结果的工具
+    // 创建instance时，传入glfw所需的extension
     Instance* instance = new Instance(applicationName, glfwExtensionCount, glfwExtensions);
 
+    //Vulkan 是与平台无关的application API，本身无法直接与窗口系统对接
+    //要在 Vulkan 和窗口系统之间建立连接，将结果呈现在屏幕上，
+    //我们需要使用 WSI（Window System Integration）扩展，即VK_KHR_surface
+    //它包含在glfwGetRequiredInstanceExtensions 返回的列表中
+    //同时，window surface需要在instance创建后立即创建，因为它实际上会影响物理设备的选择。
     VkSurfaceKHR surface;
+    //glfwCreateWindowSurface已经帮你处理了平台差异
     if (glfwCreateWindowSurface(instance->GetVkInstance(), GetGLFWWindow(), nullptr, &surface) != VK_SUCCESS) {
         throw std::runtime_error("Failed to create window surface");
     }
 
+    //选择physical device，这里可以选择你希望physical device支持哪些queue family，以及支持哪些device extensions
+    //注意，这里是device支持哪些extension（如这里检查device是否支持swap chain），前面instance是需要instance载入哪些extension
+    //instance的queueFamilyIndices在这一步决出,代表每一个所需的queue所属哪一个queueFamily
     instance->PickPhysicalDevice({ VK_KHR_SWAPCHAIN_EXTENSION_NAME }, QueueFlagBit::GraphicsBit | QueueFlagBit::TransferBit | QueueFlagBit::ComputeBit | QueueFlagBit::PresentBit, surface);
 
+    //我们将要使用的device的特性集
     VkPhysicalDeviceFeatures deviceFeatures = {};
     deviceFeatures.tessellationShader = VK_TRUE;
     deviceFeatures.fillModeNonSolid = VK_TRUE;
     deviceFeatures.samplerAnisotropy = VK_TRUE;
 
+    //选择了要使用的physical device后，我们需要设置一个logical device来与之连接
+    //logical device跟随的queue也会随之创建好
     device = instance->CreateDevice(QueueFlagBit::GraphicsBit | QueueFlagBit::TransferBit | QueueFlagBit::ComputeBit | QueueFlagBit::PresentBit, deviceFeatures);
 
+    //Vulkan没有default framebuffer的概念，所以我们需要一个叫swap chain的基础架构来own这个framebuffer
+    //我们渲染到framebuffer上，最终让它visualize on screen
     swapChain = device->CreateSwapChain(surface, 5);
 
     camera = new Camera(device, 640.f / 480.f);
@@ -143,10 +161,30 @@ int main() {
     glfwSetMouseButtonCallback(GetGLFWWindow(), mouseDownCallback);
     glfwSetCursorPosCallback(GetGLFWWindow(), mouseMoveCallback);
 
+    int count = 0;
+    double timeFor1000Frames = 0.0;
+    double FPS = 0.0;
+    int count2 = 0;
     while (!ShouldQuit()) {
+        //glfwPollEvents函数检查有没有触发什么事件并更新窗口状态
+        //并调用对应的回调函数（可以通过回调方法手动设置）
         glfwPollEvents();
         scene->UpdateTime();
+        double time1 = glfwGetTime(); // returns time in seconds
         renderer->Frame();
+        double time2 = glfwGetTime();
+        timeFor1000Frames += (time2 - time1);
+        ++count;
+        if (count == 1000) {
+            FPS += (1000.0 / timeFor1000Frames);
+            timeFor1000Frames = 0.0;
+            count = 0;
+            ++count2;
+        }
+        if (count2 == 5) {
+            std::cout << FPS / 10.0 << std::endl;
+            count2 = 6;
+        }
     }
 
     vkDeviceWaitIdle(device->GetVkDevice());
diff --git a/src/shaders/compute.comp b/src/shaders/compute.comp
index 0fd0224..108c891 100644
--- a/src/shaders/compute.comp
+++ b/src/shaders/compute.comp
@@ -2,6 +2,11 @@
 #extension GL_ARB_separate_shader_objects : enable
 
 #define WORKGROUP_SIZE 32
+#define APPLY_FORCE 1
+#define ORIENTATION_CULL 0
+#define VIEWFRUSTUM_CULL 0
+#define DISTANCE_CULL 1
+
 layout(local_size_x = WORKGROUP_SIZE, local_size_y = 1, local_size_z = 1) in;
 
 layout(set = 0, binding = 0) uniform CameraBufferObject {
@@ -36,21 +41,143 @@ struct Blade {
 // 	  uint firstInstance; // = 0
 // } numBlades;
 
+layout(set = 2, binding = 0) buffer Blades{
+    Blade blades[];
+};
+
+layout(set = 2, binding = 1) buffer Culls{
+    Blade culls[];
+};
+
+layout(set = 2, binding = 2) buffer NumBlades{
+    uint vertexCount;   // Write the number of blades remaining here
+ 	uint instanceCount; // = 1
+ 	uint firstVertex;   // = 0
+ 	uint firstInstance; // = 0
+} numBlades;
+
 bool inBounds(float value, float bounds) {
     return (value >= -bounds) && (value <= bounds);
 }
 
+vec3 getWindDir(vec3 pos, float windStrength){
+    vec3 dir = vec3(cos(totalTime), 0, sin(totalTime));
+    return dir * windStrength;
+}
+
 void main() {
 	// Reset the number of blades to 0
 	if (gl_GlobalInvocationID.x == 0) {
-		// numBlades.vertexCount = 0;
+		numBlades.vertexCount = 0;
 	}
 	barrier(); // Wait till all threads reach this point
 
+    Blade currBlade = blades[gl_GlobalInvocationID.x];
+    vec3 v0 = vec3(currBlade.v0);
+    vec3 v1 = vec3(currBlade.v1);
+    vec3 v2 = vec3(currBlade.v2);
+    vec3 up = vec3(currBlade.up);
+    float dirAng = currBlade.v0.w;
+    vec3 dir = vec3(cos(dirAng), 0, sin(dirAng));
+    float height = currBlade.v1.w;
+    float s = currBlade.up.w;
+#if APPLY_FORCE
     // TODO: Apply forces on every blade and update the vertices in the buffer
+    //environmental parameters to define
+    float gravity = 9.8;
+    float windStrength = 10;
+    
+    //gravity
+    vec3 gE = vec3(0, -1, 0) * gravity;
+    vec3 gF = gravity * 0.25 * dir;
+    vec3 g = gE + gF;
+
+    //recovery
+    vec3 Iv2 = v0 + up * height;
+    vec3 r = (Iv2 - v2) * s;
+
+    //wind
+    vec3 windDir = getWindDir(v0, windStrength);
+    float fd = 1 - length(dot(normalize(windDir), normalize(v2 - v0)));
+    float fr = dot(v2 - v0, up) / height;
+    vec3 w = fd * fr * windDir;
+
+    //total force
+    vec3 Tv2 = (g + r + w) * deltaTime;
+    
+    //update
+    v2 = v2 + Tv2;
+    if(v2.y < 0.f){
+        v2.y = 0.f;
+    }
+    float Lproj = length(v2 - v0 - up * dot((v2 - v0), up));
+    v1 = v0 + height * up * max(1 - (Lproj / height), 0.05 * max(Lproj / height, 1));
+    float L0 = length(v0 - v2);
+    float L1 = length(v2 - v1) + length(v1 - v0);
+    int degree = 2;  //beizier curve degree
+    float L = (2 * L0 + (degree - 1) * L1) / (degree + 1);
+    float R = height / L;
+    v1 = v0 + R * (v1 - v0);
+    v2 = v1 + R * (v2 - v1);
+
+    blades[gl_GlobalInvocationID.x].v1.xyz = v1.xyz;
+    blades[gl_GlobalInvocationID.x].v2.xyz = v2.xyz;
+#endif
 
 	// TODO: Cull blades that are too far away or not in the camera frustum and write them
 	// to the culled blades buffer
 	// Note: to do this, you will need to use an atomic operation to read and update numBlades.vertexCount
 	// You want to write the visible blades to the buffer without write conflicts between threads
-}
+
+    //parameter to define
+    float orientationTolerance = 0.6;
+    float tolerance = 0.0001;   //tolerance parameter for view-frustum culling
+    int n = 10;    //distance culling number of buskets
+    float dmax = 40;  //distance culling max distance
+
+#if ORIENTATION_CULL
+    //orientation culling
+    vec3 camPosXZ = vec3(inverse(camera.view)[3]);
+    camPosXZ.y = 0;
+    vec3 camPosXZNor = normalize(camPosXZ);
+    if(abs(dot(camPosXZNor, dir)) > orientationTolerance){
+        return;
+    }
+#endif
+#if VIEWFRUSTUM_CULL
+    //view-frustum culling
+    vec3 m = 0.25 * v0 + 0.5 * v1 + 0.25 * v2;
+    mat4 viewPro = camera.proj * camera.view;
+    vec4 m_prime = viewPro * vec4(m, 1);
+    vec4 v0_prime = viewPro * vec4(v0, 1);
+    vec4 v2_prime = viewPro * vec4(v2, 1);
+    float h = abs(m_prime.w) + tolerance;
+    if(!(inBounds(m_prime.x, h) && inBounds(m_prime.y, h) && inBounds(m_prime.z, h))){
+        return;
+    }
+    h = abs(v0_prime.w) + tolerance;
+    if(!(inBounds(v0_prime.x, h) && inBounds(v0_prime.y, h) && inBounds(v0_prime.z, h))){
+        return;
+    }
+    h = abs(v2_prime.w) + tolerance;
+    if(!(inBounds(v2_prime.x, h) && inBounds(v2_prime.y, h) && inBounds(v2_prime.z, h))){
+        return;
+    }
+    
+#endif
+#if DISTANCE_CULL
+    //distance culling
+    vec3 cameraPos = vec3(inverse(camera.view)[3]);
+    float dproj = length(v0 - cameraPos - up*(dot(v0 - cameraPos, up)));
+    if((gl_GlobalInvocationID.x % n) > (n * (1 - (dproj / dmax)))){
+        return;
+    }
+
+
+
+#endif
+
+    uint vertCountUpdated = atomicAdd(numBlades.vertexCount, 1);
+    culls[vertCountUpdated] = blades[gl_GlobalInvocationID.x];
+
+}
\ No newline at end of file
diff --git a/src/shaders/graphics.frag b/src/shaders/graphics.frag
index 5f15861..cbf28d0 100644
--- a/src/shaders/graphics.frag
+++ b/src/shaders/graphics.frag
@@ -10,4 +10,6 @@ layout(location = 0) out vec4 outColor;
 
 void main() {
     outColor = texture(texSampler, fragTexCoord);
+    //gl_FragDepth
+    //outColor = vec4(vec3(gl_FragCoord.z), 1.0);
 }
diff --git a/src/shaders/grass.frag b/src/shaders/grass.frag
index c7df157..e5c8ea9 100644
--- a/src/shaders/grass.frag
+++ b/src/shaders/grass.frag
@@ -7,11 +7,21 @@ layout(set = 0, binding = 0) uniform CameraBufferObject {
 } camera;
 
 // TODO: Declare fragment shader inputs
+layout(location = 0) in vec3 in_pos;
+layout(location = 1) in vec3 in_nor;
 
 layout(location = 0) out vec4 outColor;
 
 void main() {
     // TODO: Compute fragment color
+    vec3 albedo = vec3(0.56, 0.93, 0.56);
+    
+    vec3 lightPos = vec3(10, 20, 10);
+    vec3 toLight = normalize(lightPos - in_pos);
+    vec3 nor = in_nor;
+    float lambert = dot(toLight, nor);
 
-    outColor = vec4(1.0);
+    vec3 ambient = vec3(0.18, 0.31, 0.18);
+
+    outColor = vec4(ambient + (lambert * albedo), 1);
 }
diff --git a/src/shaders/grass.tesc b/src/shaders/grass.tesc
index f9ffd07..adf2b7a 100644
--- a/src/shaders/grass.tesc
+++ b/src/shaders/grass.tesc
@@ -9,18 +9,34 @@ layout(set = 0, binding = 0) uniform CameraBufferObject {
 } camera;
 
 // TODO: Declare tessellation control shader inputs and outputs
+layout(location = 0) in vec4 in_v0[];
+layout(location = 1) in vec4 in_v1[];
+layout(location = 2) in vec4 in_v2[];
+layout(location = 3) in vec4 in_up[];
+
+layout(location = 0) out vec4 out_v0[];
+layout(location = 1) out vec4 out_v1[];
+layout(location = 2) out vec4 out_v2[];
+layout(location = 3) out vec4 out_up[];
 
 void main() {
 	// Don't move the origin location of the patch
     gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
 
 	// TODO: Write any shader outputs
+    //gl_InvocationID represents which vertex the shader is on
+    out_v0[gl_InvocationID] = in_v0[gl_InvocationID]; 
+    out_v1[gl_InvocationID] = in_v1[gl_InvocationID]; 
+    out_v2[gl_InvocationID] = in_v2[gl_InvocationID]; 
+    out_up[gl_InvocationID] = in_up[gl_InvocationID];
 
 	// TODO: Set level of tesselation
-    // gl_TessLevelInner[0] = ???
-    // gl_TessLevelInner[1] = ???
-    // gl_TessLevelOuter[0] = ???
-    // gl_TessLevelOuter[1] = ???
-    // gl_TessLevelOuter[2] = ???
-    // gl_TessLevelOuter[3] = ???
+    //determine the tesselation level for each blade
+    //a tesselation value of 1, will simply make all blade just pure triangle.
+    gl_TessLevelInner[0] = 8;
+    gl_TessLevelInner[1] = 8;
+    gl_TessLevelOuter[0] = 8;
+    gl_TessLevelOuter[1] = 8;
+    gl_TessLevelOuter[2] = 8;
+    gl_TessLevelOuter[3] = 8;
 }
diff --git a/src/shaders/grass.tese b/src/shaders/grass.tese
index 751fff6..f9ef69e 100644
--- a/src/shaders/grass.tese
+++ b/src/shaders/grass.tese
@@ -9,10 +9,52 @@ layout(set = 0, binding = 0) uniform CameraBufferObject {
 } camera;
 
 // TODO: Declare tessellation evaluation shader inputs and outputs
+layout(location = 0) in vec4 in_v0[];
+layout(location = 1) in vec4 in_v1[];
+layout(location = 2) in vec4 in_v2[];
+
+layout(location = 0) out vec3 out_pos;
+layout(location = 1) out vec3 out_nor;
+
 
 void main() {
+    //gl_TessCoord: The uv coordinates of the current vertex within the patch
     float u = gl_TessCoord.x;
     float v = gl_TessCoord.y;
 
 	// TODO: Use u and v to parameterize along the grass blade and output positions for each vertex of the grass blade
+    vec3 v0 = vec3(in_v0[0]);
+    vec3 v1 = vec3(in_v1[0]);
+    vec3 v2 = vec3(in_v2[0]);
+
+    float dir = in_v0[0].w;
+    vec3 dirVec = normalize(vec3(cos(dir), 0, sin(dir))); 
+    vec3 t1 = dirVec;
+
+    //This will make all grass point at vec3(1, 0, 0)
+    //float dirDot = dot(dirVec, vec3(1, 0, 0));
+    //if(dirDot > 0){
+    //    t1 = vec3(1, 0, 0);
+    //}else{
+    //    t1 = vec3(-1, 0, 0);
+    //}
+
+    float w = in_v2[0].w;
+
+    vec3 a = v0 + v * (v1 - v0);
+    vec3 b = v1 + v * (v2 - v1);
+    vec3 c = a + v * (b - a);
+    vec3 c0 = c - w * t1;
+    vec3 c1 = c + w * t1;
+    vec3 t0 = normalize(b - a);
+    vec3 n = normalize(cross(t0, t1));
+
+    //De Casteljau, quadratic
+    float t = u - (u * v * v);
+    vec3 p = (1 - t) * c0 + t * c1;
+   
+    out_pos = p;
+    out_nor = n;
+
+    gl_Position = camera.proj * camera.view * vec4(p, 1.0f);
 }
diff --git a/src/shaders/grass.vert b/src/shaders/grass.vert
index db9dfe9..62bbeba 100644
--- a/src/shaders/grass.vert
+++ b/src/shaders/grass.vert
@@ -7,6 +7,18 @@ layout(set = 1, binding = 0) uniform ModelBufferObject {
 };
 
 // TODO: Declare vertex shader inputs and outputs
+//descriptorSet layout: camera, model
+//vertexInputInfo: Blade
+
+layout(location = 0) in vec4 in_v0;  //v0
+layout(location = 1) in vec4 in_v1;  //v1
+layout(location = 2) in vec4 in_v2;  //v2
+layout(location = 3) in vec4 in_v3;  //up
+
+layout(location = 0) out vec4 out_v0;
+layout(location = 1) out vec4 out_v1;
+layout(location = 2) out vec4 out_v2;
+layout(location = 3) out vec4 out_v3;
 
 out gl_PerVertex {
     vec4 gl_Position;
@@ -14,4 +26,16 @@ out gl_PerVertex {
 
 void main() {
 	// TODO: Write gl_Position and any other shader outputs
+    out_v0 = model * vec4(vec3(in_v0), 1);
+    gl_Position = out_v0;
+    out_v0.w = in_v0.w;
+
+    out_v1 = model * vec4(vec3(in_v1), 1);
+    out_v1.w = in_v1.w;
+
+    out_v2 = model * vec4(vec3(in_v2), 1);
+    out_v2.w = in_v2.w;
+
+    out_v3 = model * vec4(vec3(in_v3), 1);
+    out_v3.w = in_v3.w;
 }