diff --git a/docs/_posts/prabod/2025-04-11-smolvlm_instruct_int4_en.md b/docs/_posts/prabod/2025-04-11-smolvlm_instruct_int4_en.md
new file mode 100644
index 00000000000000..b963b9331659a5
--- /dev/null
+++ b/docs/_posts/prabod/2025-04-11-smolvlm_instruct_int4_en.md
@@ -0,0 +1,115 @@
+---
+layout: model
+title: SmolVLM by HUggingface
+author: John Snow Labs
+name: smolvlm_instruct_int4
+date: 2025-04-11
+tags: [en, openvino, vlm, open_source]
+task: Image Captioning
+language: en
+edition: Spark NLP 5.5.1
+spark_version: 3.0
+supported: true
+engine: openvino
+annotator: SmolVLMTransformer
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+SmolVLM is a compact open multimodal model that accepts arbitrary sequences of image and text inputs to produce text outputs. Designed for efficiency, SmolVLM can answer questions about images, describe visual content, create stories grounded on multiple images, or function as a pure language model without visual inputs. Its lightweight architecture makes it suitable for on-device applications while maintaining strong performance on multimodal tasks.
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/smolvlm_instruct_int4_en_5.5.1_3.0_1744355673028.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/smolvlm_instruct_int4_en_5.5.1_3.0_1744355673028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from pyspark.sql.functions import lit
+
+image_df = spark.read.format("image").load(path=images_path) # Replace with your image path
+test_df = image_df.withColumn(
+    "text",
+    lit("<|im_start|>User:<image>Can you describe the image?<end_of_utterance>\nAssistant:")
+)
+imageAssembler = ImageAssembler() \\
+    .setInputCol("image") \\
+    .setOutputCol("image_assembler")
+visualQAClassifier = SmolVLMTransformer.pretrained() \\
+    .setInputCols("image_assembler") \\
+    .setOutputCol("answer")
+pipeline = Pipeline().setStages([
+    imageAssembler,
+    visualQAClassifier
+])
+result = pipeline.fit(test_df).transform(test_df)
+result.select("image_assembler.origin", "answer.result").show(truncate=False)
+```
+```scala
+import spark.implicits._
+import com.johnsnowlabs.nlp.base._
+import com.johnsnowlabs.nlp.annotator._
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.lit
+
+val imageDF: DataFrame = spark.read
+  .format("image")
+  .option("dropInvalid", value = true)
+  .load(imageFolder) // Replace with your image folder
+
+val testDF: DataFrame = imageDF.withColumn("text", lit("<|im_start|>User:<image>Can you describe the image?<end_of_utterance>\nAssistant:"))
+
+val imageAssembler: ImageAssembler = new ImageAssembler()
+   .setInputCol("image")
+   .setOutputCol("image_assembler")
+
+val visualQAClassifier = SmolVLMTransformer.pretrained()
+   .setInputCols("image_assembler")
+   .setOutputCol("answer")
+
+val pipeline = new Pipeline().setStages(Array(
+  imageAssembler,
+  visualQAClassifier
+))
+
+val result = pipeline.fit(testDF).transform(testDF)
+
+result.select("image_assembler.origin", "answer.result").show(truncate=false)
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|smolvlm_instruct_int4|
+|Compatibility:|Spark NLP 5.5.1+|
+|License:|Open Source|
+|Edition:|Official|
+|Input Labels:|[image_assembler]|
+|Output Labels:|[answer]|
+|Language:|en|
+|Size:|1.8 GB|
+
+## References
+
+https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct
\ No newline at end of file
diff --git a/docs/_posts/prabod/2025-04-14-paligemma_3b_pt_224_int4_en.md b/docs/_posts/prabod/2025-04-14-paligemma_3b_pt_224_int4_en.md
new file mode 100644
index 00000000000000..218f46d77d7fb7
--- /dev/null
+++ b/docs/_posts/prabod/2025-04-14-paligemma_3b_pt_224_int4_en.md
@@ -0,0 +1,118 @@
+---
+layout: model
+title: PaliGemma 3b 224
+author: John Snow Labs
+name: paligemma_3b_pt_224_int4
+date: 2025-04-14
+tags: [en, open_source, openvino]
+task: Image Captioning
+language: en
+edition: Spark NLP 5.5.1
+spark_version: 3.0
+supported: true
+engine: openvino
+annotator: PaliGemmaForMultiModal
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. It takes both image and text as input and generates text as output, supporting multiple languages. It is designed for class-leading fine-tune performance on a wide range of vision-language tasks such as image and short video caption, visual question answering, text reading, object detection and object segmentation.
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/paligemma_3b_pt_224_int4_en_5.5.1_3.0_1744628366633.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/paligemma_3b_pt_224_int4_en_5.5.1_3.0_1744628366633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from pyspark.sql.functions import lit
+
+image_df = spark.read.format("image").load(path=images_path) # Replace with your image path
+test_df = image_df.withColumn("text", lit("USER: \n <image> \nDescribe this image. \nASSISTANT:\n"))
+
+imageAssembler = ImageAssembler()   
+          .setInputCol("image")   
+          .setOutputCol("image_assembler")
+
+visualQAClassifier = PaliGemmaForMultiModal.pretrained()   
+          .setInputCols("image_assembler")   
+          .setOutputCol("answer")
+
+pipeline = Pipeline().setStages([
+          imageAssembler,
+          visualQAClassifier
+])
+
+result = pipeline.fit(test_df).transform(test_df)
+result.select("image_assembler.origin", "answer.result").show(False)
+```
+```scala
+import spark.implicits._
+import com.johnsnowlabs.nlp.base._
+import com.johnsnowlabs.nlp.annotator._
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.lit
+
+val imageFolder = "path/to/your/images" // Replace with your image path
+
+val imageDF: DataFrame = spark.read
+     .format("image")
+     .option("dropInvalid", value = true)
+     .load(imageFolder)
+
+val testDF: DataFrame = imageDF.withColumn("text", lit("USER: \n <image> \nDescribe this image. \nASSISTANT:\n"))
+
+val imageAssembler: ImageAssembler = new ImageAssembler()
+     .setInputCol("image")
+     .setOutputCol("image_assembler")
+
+val visualQAClassifier = PaliGemmaForMultiModal.pretrained()
+     .setInputCols("image_assembler")
+     .setOutputCol("answer")
+
+val pipeline = new Pipeline().setStages(Array(
+     imageAssembler,
+     visualQAClassifier
+))
+
+val result = pipeline.fit(testDF).transform(testDF)
+
+result.select("image_assembler.origin", "answer.result").show(false)
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|paligemma_3b_pt_224_int4|
+|Compatibility:|Spark NLP 5.5.1+|
+|License:|Open Source|
+|Edition:|Official|
+|Input Labels:|[image_assembler]|
+|Output Labels:|[answer]|
+|Language:|en|
+|Size:|3.1 GB|
+
+## References
+
+https://huggingface.co/google/paligemma-3b-pt-224
\ No newline at end of file
diff --git a/docs/_posts/prabod/2025-04-15-paligemma2_3b_mix_224_int4_en.md b/docs/_posts/prabod/2025-04-15-paligemma2_3b_mix_224_int4_en.md
new file mode 100644
index 00000000000000..3787849880dc8c
--- /dev/null
+++ b/docs/_posts/prabod/2025-04-15-paligemma2_3b_mix_224_int4_en.md
@@ -0,0 +1,118 @@
+---
+layout: model
+title: PaliGemma2 3B mix 224 int4
+author: John Snow Labs
+name: paligemma2_3b_mix_224_int4
+date: 2025-04-15
+tags: [en, open_source, openvino]
+task: Image Captioning
+language: en
+edition: Spark NLP 5.5.1
+spark_version: 3.0
+supported: true
+engine: openvino
+annotator: PaliGemmaForMultiModal
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+PaliGemma 2 is an update of the PaliGemma vision-language model (VLM) which incorporates the capabilities of the Gemma 2 models. The PaliGemma family of models is inspired by PaLI-3 and based on open components such as the SigLIP vision model and Gemma 2 language models. It takes both image and text as input and generates text as output, supporting multiple languages. It is designed for class-leading fine-tune performance on a wide range of vision-language tasks such as image and short video caption, visual question answering, text reading, object detection and object segmentation.
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/paligemma2_3b_mix_224_int4_en_5.5.1_3.0_1744695280010.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/paligemma2_3b_mix_224_int4_en_5.5.1_3.0_1744695280010.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from pyspark.sql.functions import lit
+
+image_df = spark.read.format("image").load(path=images_path) # Replace with your image path
+test_df = image_df.withColumn("text", lit("USER: \n <image> \nDescribe this image. \nASSISTANT:\n"))
+
+imageAssembler = ImageAssembler()   
+          .setInputCol("image")   
+          .setOutputCol("image_assembler")
+
+visualQAClassifier = PaliGemmaForMultiModal.pretrained()   
+          .setInputCols("image_assembler")   
+          .setOutputCol("answer")
+
+pipeline = Pipeline().setStages([
+          imageAssembler,
+          visualQAClassifier
+])
+
+result = pipeline.fit(test_df).transform(test_df)
+result.select("image_assembler.origin", "answer.result").show(False)
+```
+```scala
+import spark.implicits._
+import com.johnsnowlabs.nlp.base._
+import com.johnsnowlabs.nlp.annotator._
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.lit
+
+val imageFolder = "path/to/your/images" // Replace with your image path
+
+val imageDF: DataFrame = spark.read
+     .format("image")
+     .option("dropInvalid", value = true)
+     .load(imageFolder)
+
+val testDF: DataFrame = imageDF.withColumn("text", lit("USER: \n <image> \nDescribe this image. \nASSISTANT:\n"))
+
+val imageAssembler: ImageAssembler = new ImageAssembler()
+     .setInputCol("image")
+     .setOutputCol("image_assembler")
+
+val visualQAClassifier = PaliGemmaForMultiModal.pretrained()
+     .setInputCols("image_assembler")
+     .setOutputCol("answer")
+
+val pipeline = new Pipeline().setStages(Array(
+     imageAssembler,
+     visualQAClassifier
+))
+
+val result = pipeline.fit(testDF).transform(testDF)
+
+result.select("image_assembler.origin", "answer.result").show(false)
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|paligemma2_3b_mix_224_int4|
+|Compatibility:|Spark NLP 5.5.1+|
+|License:|Open Source|
+|Edition:|Official|
+|Input Labels:|[image_assembler]|
+|Output Labels:|[answer]|
+|Language:|en|
+|Size:|3.0 GB|
+
+## References
+
+https://huggingface.co/google/paligemma2-3b-mix-224
\ No newline at end of file
diff --git a/docs/_posts/prabod/2025-04-15-paligemma2_3b_pt_448_int4_en.md b/docs/_posts/prabod/2025-04-15-paligemma2_3b_pt_448_int4_en.md
new file mode 100644
index 00000000000000..dbb0dc2d97fe92
--- /dev/null
+++ b/docs/_posts/prabod/2025-04-15-paligemma2_3b_pt_448_int4_en.md
@@ -0,0 +1,118 @@
+---
+layout: model
+title: PaliGemma2 3B pt 448 int4
+author: John Snow Labs
+name: paligemma2_3b_pt_448_int4
+date: 2025-04-15
+tags: [en, open_source, openvino]
+task: Image Captioning
+language: en
+edition: Spark NLP 5.5.1
+spark_version: 3.0
+supported: true
+engine: openvino
+annotator: PaliGemmaForMultiModal
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+PaliGemma 2 is an update of the PaliGemma vision-language model (VLM) which incorporates the capabilities of the Gemma 2 models. The PaliGemma family of models is inspired by PaLI-3 and based on open components such as the SigLIP vision model and Gemma 2 language models. It takes both image and text as input and generates text as output, supporting multiple languages. It is designed for class-leading fine-tune performance on a wide range of vision-language tasks such as image and short video caption, visual question answering, text reading, object detection and object segmentation.
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/paligemma2_3b_pt_448_int4_en_5.5.1_3.0_1744693861187.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/paligemma2_3b_pt_448_int4_en_5.5.1_3.0_1744693861187.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from pyspark.sql.functions import lit
+
+image_df = spark.read.format("image").load(path=images_path) # Replace with your image path
+test_df = image_df.withColumn("text", lit("USER: \n <image> \nDescribe this image. \nASSISTANT:\n"))
+
+imageAssembler = ImageAssembler()   
+          .setInputCol("image")   
+          .setOutputCol("image_assembler")
+
+visualQAClassifier = PaliGemmaForMultiModal.pretrained()   
+          .setInputCols("image_assembler")   
+          .setOutputCol("answer")
+
+pipeline = Pipeline().setStages([
+          imageAssembler,
+          visualQAClassifier
+])
+
+result = pipeline.fit(test_df).transform(test_df)
+result.select("image_assembler.origin", "answer.result").show(False)
+```
+```scala
+import spark.implicits._
+import com.johnsnowlabs.nlp.base._
+import com.johnsnowlabs.nlp.annotator._
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.lit
+
+val imageFolder = "path/to/your/images" // Replace with your image path
+
+val imageDF: DataFrame = spark.read
+     .format("image")
+     .option("dropInvalid", value = true)
+     .load(imageFolder)
+
+val testDF: DataFrame = imageDF.withColumn("text", lit("USER: \n <image> \nDescribe this image. \nASSISTANT:\n"))
+
+val imageAssembler: ImageAssembler = new ImageAssembler()
+     .setInputCol("image")
+     .setOutputCol("image_assembler")
+
+val visualQAClassifier = PaliGemmaForMultiModal.pretrained()
+     .setInputCols("image_assembler")
+     .setOutputCol("answer")
+
+val pipeline = new Pipeline().setStages(Array(
+     imageAssembler,
+     visualQAClassifier
+))
+
+val result = pipeline.fit(testDF).transform(testDF)
+
+result.select("image_assembler.origin", "answer.result").show(false)
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|paligemma2_3b_pt_448_int4|
+|Compatibility:|Spark NLP 5.5.1+|
+|License:|Open Source|
+|Edition:|Official|
+|Input Labels:|[image_assembler]|
+|Output Labels:|[answer]|
+|Language:|en|
+|Size:|3.0 GB|
+
+## References
+
+https://huggingface.co/google/paligemma2-3b-mix-224
\ No newline at end of file
diff --git a/docs/_posts/prabod/2025-04-15-paligemma_3b_ft_vqav2_448_int4_en.md b/docs/_posts/prabod/2025-04-15-paligemma_3b_ft_vqav2_448_int4_en.md
new file mode 100644
index 00000000000000..4340b430e5423a
--- /dev/null
+++ b/docs/_posts/prabod/2025-04-15-paligemma_3b_ft_vqav2_448_int4_en.md
@@ -0,0 +1,118 @@
+---
+layout: model
+title: PaliGemma 3B ft vqav2 int4
+author: John Snow Labs
+name: paligemma_3b_ft_vqav2_448_int4
+date: 2025-04-15
+tags: [en, openvino, open_source]
+task: Image Captioning
+language: en
+edition: Spark NLP 5.5.1
+spark_version: 3.0
+supported: true
+engine: openvino
+annotator: PaliGemmaForMultiModal
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. It takes both image and text as input and generates text as output, supporting multiple languages. It is designed for class-leading fine-tune performance on a wide range of vision-language tasks such as image and short video caption, visual question answering, text reading, object detection and object segmentation.
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/paligemma_3b_ft_vqav2_448_int4_en_5.5.1_3.0_1744684675991.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/paligemma_3b_ft_vqav2_448_int4_en_5.5.1_3.0_1744684675991.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from pyspark.sql.functions import lit
+
+image_df = spark.read.format("image").load(path=images_path) # Replace with your image path
+test_df = image_df.withColumn("text", lit("USER: \n <image> \nDescribe this image. \nASSISTANT:\n"))
+
+imageAssembler = ImageAssembler()   
+          .setInputCol("image")   
+          .setOutputCol("image_assembler")
+
+visualQAClassifier = PaliGemmaForMultiModal.pretrained()   
+          .setInputCols("image_assembler")   
+          .setOutputCol("answer")
+
+pipeline = Pipeline().setStages([
+          imageAssembler,
+          visualQAClassifier
+])
+
+result = pipeline.fit(test_df).transform(test_df)
+result.select("image_assembler.origin", "answer.result").show(False)
+```
+```scala
+import spark.implicits._
+import com.johnsnowlabs.nlp.base._
+import com.johnsnowlabs.nlp.annotator._
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.lit
+
+val imageFolder = "path/to/your/images" // Replace with your image path
+
+val imageDF: DataFrame = spark.read
+     .format("image")
+     .option("dropInvalid", value = true)
+     .load(imageFolder)
+
+val testDF: DataFrame = imageDF.withColumn("text", lit("USER: \n <image> \nDescribe this image. \nASSISTANT:\n"))
+
+val imageAssembler: ImageAssembler = new ImageAssembler()
+     .setInputCol("image")
+     .setOutputCol("image_assembler")
+
+val visualQAClassifier = PaliGemmaForMultiModal.pretrained()
+     .setInputCols("image_assembler")
+     .setOutputCol("answer")
+
+val pipeline = new Pipeline().setStages(Array(
+     imageAssembler,
+     visualQAClassifier
+))
+
+val result = pipeline.fit(testDF).transform(testDF)
+
+result.select("image_assembler.origin", "answer.result").show(false)
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|paligemma_3b_ft_vqav2_448_int4|
+|Compatibility:|Spark NLP 5.5.1+|
+|License:|Open Source|
+|Edition:|Official|
+|Input Labels:|[image_assembler]|
+|Output Labels:|[answer]|
+|Language:|en|
+|Size:|3.1 GB|
+
+## References
+
+https://huggingface.co/google/paligemma-3b-pt-224
\ No newline at end of file
diff --git a/docs/_posts/prabod/2025-04-15-paligemma_3b_pt_224_int4_en.md b/docs/_posts/prabod/2025-04-15-paligemma_3b_pt_224_int4_en.md
new file mode 100644
index 00000000000000..00fc7681434281
--- /dev/null
+++ b/docs/_posts/prabod/2025-04-15-paligemma_3b_pt_224_int4_en.md
@@ -0,0 +1,114 @@
+---
+layout: model
+title: PaliGemma 3b pt 224 int4
+author: John Snow Labs
+name: paligemma_3b_pt_224_int4
+date: 2025-04-15
+tags: [en, open_source, openvino]
+task: Image Captioning
+language: en
+edition: Spark NLP 5.5.1
+spark_version: 3.0
+supported: true
+engine: openvino
+annotator: PaliGemmaForMultiModal
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+PaliGemma 2 is an update of the PaliGemma vision-language model (VLM) which incorporates the capabilities of the Gemma 2 models. The PaliGemma family of models is inspired by PaLI-3 and based on open components such as the SigLIP vision model and Gemma 2 language models. It takes both image and text as input and generates text as output, supporting multiple languages. It is designed for class-leading fine-tune performance on a wide range of vision-language tasks such as image and short video caption, visual question answering, text reading, object detection and object segmentation.
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/paligemma_3b_pt_224_int4_en_5.5.1_3.0_1744687182569.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/paligemma_3b_pt_224_int4_en_5.5.1_3.0_1744687182569.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from pyspark.sql.functions import lit
+
+image_df = spark.read.format("image").load(path=images_path) # Replace with your image path
+test_df = image_df.withColumn("text", lit("USER: \n <image> \nDescribe this image. \nASSISTANT:\n"))
+
+imageAssembler = ImageAssembler()   
+          .setInputCol("image")   
+          .setOutputCol("image_assembler")
+
+visualQAClassifier = PaliGemmaForMultiModal.pretrained()   
+          .setInputCols("image_assembler")   
+          .setOutputCol("answer")
+
+pipeline = Pipeline().setStages([
+          imageAssembler,
+          visualQAClassifier
+])
+
+result = pipeline.fit(test_df).transform(test_df)
+result.select("image_assembler.origin", "answer.result").show(False)
+```
+```scala
+import spark.implicits._
+import com.johnsnowlabs.nlp.base._
+import com.johnsnowlabs.nlp.annotator._
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.lit
+
+val imageFolder = "path/to/your/images" // Replace with your image path
+
+val imageDF: DataFrame = spark.read
+     .format("image")
+     .option("dropInvalid", value = true)
+     .load(imageFolder)
+
+val testDF: DataFrame = imageDF.withColumn("text", lit("USER: \n <image> \nDescribe this image. \nASSISTANT:\n"))
+
+val imageAssembler: ImageAssembler = new ImageAssembler()
+     .setInputCol("image")
+     .setOutputCol("image_assembler")
+
+val visualQAClassifier = PaliGemmaForMultiModal.pretrained()
+     .setInputCols("image_assembler")
+     .setOutputCol("answer")
+
+val pipeline = new Pipeline().setStages(Array(
+     imageAssembler,
+     visualQAClassifier
+))
+
+val result = pipeline.fit(testDF).transform(testDF)
+
+result.select("image_assembler.origin", "answer.result").show(false)
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|paligemma_3b_pt_224_int4|
+|Compatibility:|Spark NLP 5.5.1+|
+|License:|Open Source|
+|Edition:|Official|
+|Input Labels:|[image_assembler]|
+|Output Labels:|[answer]|
+|Language:|en|
+|Size:|3.1 GB|
\ No newline at end of file
diff --git a/docs/_posts/prabod/2025-04-28-gemma_3_4b_it_int4_en.md b/docs/_posts/prabod/2025-04-28-gemma_3_4b_it_int4_en.md
new file mode 100644
index 00000000000000..a333cf6e6d0ab9
--- /dev/null
+++ b/docs/_posts/prabod/2025-04-28-gemma_3_4b_it_int4_en.md
@@ -0,0 +1,124 @@
+---
+layout: model
+title: Gemma 3 4B IT INT4
+author: John Snow Labs
+name: gemma_3_4b_it_int4
+date: 2025-04-28
+tags: [en, open_source, openvino]
+task: Image Captioning
+language: en
+edition: Spark NLP 5.5.1
+spark_version: 3.0
+supported: true
+engine: openvino
+annotator: Gemma3ForMultiModal
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+Gemma3ForMultiModal can load Gemma 3 Vision models for visual question answering.
+The model consists of a vision encoder, a text encoder, a text decoder and a model merger.
+The vision encoder will encode the input image, the text encoder will encode the input text,
+the model merger will merge the image and text embeddings, and the text decoder will output the answer.
+
+Gemma 3 is a family of lightweight, state-of-the-art open models from Google, built from the same 
+research and technology used to create the Gemini models. It features:
+- Large 128K context window
+- Multilingual support in over 140 languages
+- Multimodal capabilities handling both text and image inputs
+- Optimized for deployment on limited resources (laptops, desktops, cloud)
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_3_4b_it_int4_en_5.5.1_3.0_1745821980536.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_3_4b_it_int4_en_5.5.1_3.0_1745821980536.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from pyspark.sql.functions import lit
+
+image_df = spark.read.format("image").load(path=images_path) # Replace with your image path
+test_df = image_df.withColumn("text", lit("<bos><start_of_turn>user\nYou are a helpful assistant.\n\n<start_of_image>Describe this image in detail.<end_of_turn>\n<start_of_turn>model\n"))
+
+imageAssembler = ImageAssembler()   
+          .setInputCol("image")   
+          .setOutputCol("image_assembler")
+
+visualQAClassifier = Gemma3ForMultiModal.pretrained()   
+          .setInputCols("image_assembler")   
+          .setOutputCol("answer")
+
+pipeline = Pipeline().setStages([
+          imageAssembler,
+          visualQAClassifier
+])
+
+result = pipeline.fit(test_df).transform(test_df)
+result.select("image_assembler.origin", "answer.result").show(False)
+```
+```scala
+import spark.implicits._
+import com.johnsnowlabs.nlp.base._
+import com.johnsnowlabs.nlp.annotator._
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.lit
+
+val imageFolder = "path/to/your/images" // Replace with your image path
+
+val imageDF: DataFrame = spark.read
+     .format("image")
+     .option("dropInvalid", value = true)
+     .load(imageFolder)
+
+val testDF: DataFrame = imageDF.withColumn("text", lit("<bos><start_of_turn>user\nYou are a helpful assistant.\n\n<start_of_image>Describe this image in detail.<end_of_turn>\n<start_of_turn>model\n"))
+
+val imageAssembler: ImageAssembler = new ImageAssembler()
+     .setInputCol("image")
+     .setOutputCol("image_assembler")
+
+val visualQAClassifier = Gemma3ForMultiModal.pretrained()
+     .setInputCols("image_assembler")
+     .setOutputCol("answer")
+
+val pipeline = new Pipeline().setStages(Array(
+     imageAssembler,
+     visualQAClassifier
+))
+
+val result = pipeline.fit(testDF).transform(testDF)
+
+result.select("image_assembler.origin", "answer.result").show(false)
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|gemma_3_4b_it_int4|
+|Compatibility:|Spark NLP 5.5.1+|
+|License:|Open Source|
+|Edition:|Official|
+|Input Labels:|[image_assembler]|
+|Output Labels:|[answer]|
+|Language:|en|
+|Size:|3.1 GB|
\ No newline at end of file
diff --git a/docs/_posts/prabod/2025-04-28-gemma_3_4b_pt_int4_en.md b/docs/_posts/prabod/2025-04-28-gemma_3_4b_pt_int4_en.md
new file mode 100644
index 00000000000000..c5ef93260005e5
--- /dev/null
+++ b/docs/_posts/prabod/2025-04-28-gemma_3_4b_pt_int4_en.md
@@ -0,0 +1,124 @@
+---
+layout: model
+title: Gemma 3 4B PT Int4
+author: John Snow Labs
+name: gemma_3_4b_pt_int4
+date: 2025-04-28
+tags: [en, open_source, openvino]
+task: Image Captioning
+language: en
+edition: Spark NLP 5.5.1
+spark_version: 3.0
+supported: true
+engine: openvino
+annotator: Gemma3ForMultiModal
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+Gemma3ForMultiModal can load Gemma 3 Vision models for visual question answering.
+The model consists of a vision encoder, a text encoder, a text decoder and a model merger.
+The vision encoder will encode the input image, the text encoder will encode the input text,
+the model merger will merge the image and text embeddings, and the text decoder will output the answer.
+
+Gemma 3 is a family of lightweight, state-of-the-art open models from Google, built from the same 
+research and technology used to create the Gemini models. It features:
+- Large 128K context window
+- Multilingual support in over 140 languages
+- Multimodal capabilities handling both text and image inputs
+- Optimized for deployment on limited resources (laptops, desktops, cloud)
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+<button class="button button-orange" disabled>Live Demo</button>
+<button class="button button-orange" disabled>Open in Colab</button>
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gemma_3_4b_pt_int4_en_5.5.1_3.0_1745822550666.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gemma_3_4b_pt_int4_en_5.5.1_3.0_1745822550666.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
+
+## How to use
+
+
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+```python
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from pyspark.sql.functions import lit
+
+image_df = spark.read.format("image").load(path=images_path) # Replace with your image path
+test_df = image_df.withColumn("text", lit("<bos><start_of_turn>user\nYou are a helpful assistant.\n\n<start_of_image>Describe this image in detail.<end_of_turn>\n<start_of_turn>model\n"))
+
+imageAssembler = ImageAssembler()   
+          .setInputCol("image")   
+          .setOutputCol("image_assembler")
+
+visualQAClassifier = Gemma3ForMultiModal.pretrained()   
+          .setInputCols("image_assembler")   
+          .setOutputCol("answer")
+
+pipeline = Pipeline().setStages([
+          imageAssembler,
+          visualQAClassifier
+])
+
+result = pipeline.fit(test_df).transform(test_df)
+result.select("image_assembler.origin", "answer.result").show(False)
+```
+```scala
+import spark.implicits._
+import com.johnsnowlabs.nlp.base._
+import com.johnsnowlabs.nlp.annotator._
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.lit
+
+val imageFolder = "path/to/your/images" // Replace with your image path
+
+val imageDF: DataFrame = spark.read
+     .format("image")
+     .option("dropInvalid", value = true)
+     .load(imageFolder)
+
+val testDF: DataFrame = imageDF.withColumn("text", lit("<bos><start_of_turn>user\nYou are a helpful assistant.\n\n<start_of_image>Describe this image in detail.<end_of_turn>\n<start_of_turn>model\n"))
+
+val imageAssembler: ImageAssembler = new ImageAssembler()
+     .setInputCol("image")
+     .setOutputCol("image_assembler")
+
+val visualQAClassifier = Gemma3ForMultiModal.pretrained()
+     .setInputCols("image_assembler")
+     .setOutputCol("answer")
+
+val pipeline = new Pipeline().setStages(Array(
+     imageAssembler,
+     visualQAClassifier
+))
+
+val result = pipeline.fit(testDF).transform(testDF)
+
+result.select("image_assembler.origin", "answer.result").show(false)
+```
+</div>
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|gemma_3_4b_pt_int4|
+|Compatibility:|Spark NLP 5.5.1+|
+|License:|Open Source|
+|Edition:|Official|
+|Input Labels:|[image_assembler]|
+|Output Labels:|[answer]|
+|Language:|en|
+|Size:|3.1 GB|
\ No newline at end of file