Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 20 additions & 16 deletions apps/pre-processing-service/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@
FROM python:3.11-slim AS builder
WORKDIR /app

# 필수 OS 패키지 (기존 + Chrome 설치용 패키지 추가)
# 필수 OS 패키지 설치
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
wget \
unzip \
gnupg \
ca-certificates \
build-essential \
&& rm -rf /var/lib/apt/lists/*

# Poetry 설치
Expand All @@ -20,16 +18,15 @@ RUN poetry self add "poetry-plugin-export>=1.7.0"
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# 의존성 해결 → requirements로 export → pip로 설치(= 반드시 /opt/venv에 설치됨)
# poetry → requirements로 export → pip로 설치
COPY pyproject.toml poetry.lock ./
RUN poetry export --without dev -f requirements.txt -o requirements.txt \
&& pip install --no-cache-dir -r requirements.txt

# ---- runtime ----
FROM python:3.11-slim AS final
WORKDIR /app

# Chrome과 ChromeDriver 설치를 위한 패키지 설치
# Chrome과 ChromeDriver 설치를 위한 패키지 설치 (삭제 예정 - 마운트 방식)
RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
unzip \
Expand All @@ -38,30 +35,37 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*

# Chrome 설치 (블로그 방식 - 직접 .deb 파일 다운로드)
# Chrome 설치 (삭제 예정 - 마운트 방식)
RUN wget -q https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
&& apt-get update \
&& apt-get install -y ./google-chrome-stable_current_amd64.deb \
&& rm ./google-chrome-stable_current_amd64.deb \
&& rm -rf /var/lib/apt/lists/*

# MeCab & 사전 설치 (형태소 분석 의존)
# MeCab & 사전 설치 (삭제 예정 - 마운트 방식)
RUN apt-get update && apt-get install -y --no-install-recommends \
mecab \
libmecab-dev \
mecab-ipadic-utf8 \
wget \
build-essential \
&& rm -rf /var/lib/apt/lists/*

# 한국어 사전 수동 설치 (삭제 예정 - 마운트 방식)
RUN cd /tmp && \
wget https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz && \
tar -zxf mecab-ko-dic-2.1.1-20180720.tar.gz && \
cd mecab-ko-dic-2.1.1-20180720 && \
./configure && \
make && \
make install && \
cd / && rm -rf /tmp/mecab-ko-dic-*

# /opt/venv 복사
COPY --from=builder /opt/venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# 앱 소스
COPY . .


# 환경변수로 MeCab 경로 지정
ENV MECAB_PATH=/usr/lib/mecab/dic/ipadic

# (권장 대안) 코드에서 uvicorn import 안 하고 프로세스 매니저로 실행하려면:
ENTRYPOINT ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "app.main:app", "-b", "0.0.0.0:8000", "--timeout", "120"]
# gunicorn으로 FastAPI 앱 실행 - 타임아웃 240초 설정
ENTRYPOINT ["/opt/venv/bin/gunicorn", "-k", "uvicorn.workers.UvicornWorker", "app.main:app", "-b", "0.0.0.0:8000", "--timeout", "240"]
17 changes: 17 additions & 0 deletions apps/pre-processing-service/app/api/endpoints/blog.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,27 @@
from app.utils.response import Response
from app.service.blog.blog_create_service import BlogContentService
from app.service.blog.blog_publish_service import BlogPublishService
from app.service.ocr.S3OCRProcessor import S3OCRProcessor

router = APIRouter()


@router.post(
"/ocr/extract",
response_model=ResponseImageTextExtract,
summary="S3 이미지에서 텍스트 추출 및 번역",
)
async def ocr_extract(request: RequestImageTextExtract):
"""
S3 이미지에서 텍스트 추출 및 번역
"""
processor = S3OCRProcessor(request.keyword)

result = processor.process_images()

return Response.ok(result)


@router.post(
"/rag/create",
response_model=ResponseBlogCreate,
Expand Down
3 changes: 3 additions & 0 deletions apps/pre-processing-service/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ class BaseSettingsConfig(BaseSettings):
# 테스트/추가용 필드
OPENAI_API_KEY: Optional[str] = None # << 이 부분 추가

# OCR 번역기 설정
google_application_credentials: Optional[str] = None

def __init__(self, **kwargs):
super().__init__(**kwargs)

Expand Down
23 changes: 23 additions & 0 deletions apps/pre-processing-service/app/model/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,29 @@ class ResponseBlogCreate(ResponseBase[BlogCreateData]):
pass


# ================== 이미지에서 텍스트 추출 및 번역 ==================
class RequestImageTextExtract(RequestBase):
keyword: Optional[str] = Field(
..., title="키워드", description="텍스트 추출용 키워드"
)


class ImageTextExtract(BaseModel):
keyword: Optional[str] = Field(
..., title="키워드", description="텍스트 추출용 키워드"
)
extraction_language: str = Field(
..., title="추출된 텍스트", description="이미지에서 추출된 텍스트"
)
translation_language: str = Field(
..., title="번역된 텍스트", description="추출된 텍스트의 번역본"
)


class ResponseImageTextExtract(ResponseBase[ImageTextExtract]):
pass


# ============== 블로그 배포 ==============


Expand Down
Loading
Loading