ApptiveDev · mongdmin · May 17, 2026 · May 17, 2026 · May 17, 2026 · May 17, 2026
diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
 .cursor/
+
+# Environment Variables
+.env
diff --git a/myeongsung/.gitignore b/myeongsung/.gitignore
@@ -37,3 +37,10 @@ wheels/
 # SQLite
 *.db
 *.sqlite3logs/
+
+# Data and API specs
+data/
+ai_job_extraction_api_spec.md
+ai_experience_extraction_api_spec.md
+tests/results/
+/*.pdf
diff --git a/myeongsung/app/api/router.py b/myeongsung/app/api/router.py
@@ -77,8 +77,39 @@ async def analyze_image(files: List[UploadFile] = File(...)):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
-@router.post("/analyze-and-place", response_model=PlacementResponse)
+from app.schemas.resume_dto import ExperienceExtractionResponse
+from app.services.experience_extraction_service import extract_experiences_from_text, extract_experiences_from_url, extract_experiences_from_pdf
+
+@router.post("/extract-experiences", response_model=ExperienceExtractionResponse)
+async def extract_experiences(
+    file: Optional[UploadFile] = File(None, description="자소서 원문 PDF 파일"),
+    url: Optional[str] = Form(None, description="자소서 웹페이지 URL"),
+    text: Optional[str] = Form(None, description="자소서 텍스트 원문")
+):
+    """
+    자소서 원문(PDF, URL, 텍스트 중 하나)을 입력받아, 내재된 경험들을 STAR 포맷으로 구조화하여 추출합니다.
+    """
+    if not file and not (url and url.strip()) and not (text and text.strip()):
+        raise HTTPException(
+            status_code=400,
+            detail="file (업로드 파일), url, text 중 최소 하나는 제공되어야 합니다."
+        )
 
+    try:
+        if file and file.filename:
+            file_content = await file.read()
+            if file.filename.lower().endswith(".pdf"):
+                return extract_experiences_from_pdf(file_content)
+            else:
+                return extract_experiences_from_text(file_content.decode("utf-8"))
+        elif url and url.strip():
+            return extract_experiences_from_url(url.strip())
+        elif text and text.strip():
+            return extract_experiences_from_text(text.strip())
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@router.post("/analyze-and-place", response_model=PlacementResponse)
 async def analyze_and_place(
     background_tasks: BackgroundTasks,
     jd_pdf: Optional[UploadFile] = File(None, description="채용공고 원문 PDF 파일 (업스테이지 파싱용)"),

diff --git a/myeongsung/app/schemas/resume_dto.py b/myeongsung/app/schemas/resume_dto.py
@@ -32,3 +32,25 @@ class PlacementResult(BaseModel):
 class PlacementResponse(BaseModel):
     placements: List[PlacementResult]
     errors: List[str] = []
+
+# ── 자소서 기반 경험 추출 스키마 ──────────────────────────────────────
+class ExtractedExperience(BaseModel):
+    experience_name: str = Field(..., description="경험명 (예: 경식이 AI 전화 서비스 기획)")
+    experience_type: str = Field(..., description="경험 유형 (예: 프로젝트, 인턴, 동아리, 창업, 해커톤 등)")
+    organization: Optional[str] = Field(None, description="기관/소속")
+    period: Optional[str] = Field(None, description="기간")
+    my_role: str = Field(..., description="나의 역할 (Task)")
+
+    # STAR + L
+    situation: str = Field(..., description="[S] 문제상황")
+    action: str = Field(..., description="[A] 주요 행동")
+    result: str = Field(..., description="[R] 결과/성과")
+    learnings: Optional[str] = Field(None, description="배운 점")
+
+    core_competencies: List[str] = Field(..., description="핵심 역량 태그 (예: 문제해결, 기획력 등)")
+    applicable_questions: List[str] = Field(..., description="활용 가능 문항 (예: 문제해결 경험, 도전 경험 등)")
+    source_text: str = Field(..., description="원문 출처 (추출의 근거가 된 자소서 원본 일부)")
+    status: str = Field(default="미확인", description="상태 (미확인, 저장완료, 삭제 등)")
+
+class ExperienceExtractionResponse(BaseModel):
+    experiences: List[ExtractedExperience] = Field(..., description="추출된 경험 후보 목록")
diff --git a/myeongsung/app/services/experience_extraction_service.py b/myeongsung/app/services/experience_extraction_service.py
@@ -0,0 +1,101 @@
+import os
+import requests
+from bs4 import BeautifulSoup
+import fitz  # PyMuPDF
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from app.schemas.resume_dto import ExperienceExtractionResponse
+
+def extract_experiences_from_text(text: str) -> ExperienceExtractionResponse:
+    """
+    텍스트 본문(자소서 등)에서 AI를 사용해 경험을 STAR 기반으로 구조화하여 추출합니다.
+    """
+    llm = ChatOpenAI(model="gpt-4o", temperature=0)
+
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", (
+            "당신은 사용자의 자기소개서(자소서) 원문에서 경험(Experience)을 추출하는 전문가입니다.\n"
+            "주어진 자소서 내용에서 하나 또는 여러 개의 독립된 경험을 추출하여 구조화된 데이터로 반환하세요.\n\n"
+            "### 추출 가이드라인:\n"
+            "1. **경험 분리**: 자소서 하나에 여러 경험(예: 인턴, 해커톤 등)이 섞여 있다면 각각을 분리하여 추출하세요.\n"
+            "2. **STAR 구조화**: 각 경험에 대해 다음 항목들을 명확히 분류하세요.\n"
+            "   - 경험명 (experience_name)\n"
+            "   - 경험 유형 (experience_type)\n"
+            "   - 기관/소속 (organization)\n"
+            "   - 기간 (period)\n"
+            "   - 나의 역할 (my_role)\n"
+            "   - [S] 문제상황 (situation)\n"
+            "   - [A] 주요 행동 (action)\n"
+            "   - [R] 결과/성과 (result)\n"
+            "   - [L] 배운 점 (learnings)\n"
+            "3. **역량 태그**: 해당 경험을 통해 어필할 수 있는 핵심 역량(예: 문제해결, 사용자 이해, 기획력, 소익성 등)을 2~4개 추출하여 core_competencies 필드에 저장하세요.\n"
+            "4. **활용 가능 문항**: 이 경험이 어떤 면접/자소서 문항(예: 갈등 극복, 도전, 직무 역량, 공익 기여 등)에 적합한지 추천하여 applicable_questions 필드에 저장하세요.\n"
+            "5. **원문 출처**: 해당 경험을 추출한 원문의 실제 문장들을 source_text 필드에 기록하세요.\n"
+            "6. **상태**: 상태(status)는 기본적으로 '미확인'으로 지정하세요.\n"
+        )),
+        ("user", "다음은 자기소개서 내용입니다. 위 가이드라인에 따라 경험을 추출해주세요:\n\n{text}")
+    ])
+
+    chain = prompt | llm.with_structured_output(ExperienceExtractionResponse)
+
+    try:
+        result = chain.invoke(
+            {"text": text},
+            config={
+                "run_name": "experience-extraction",
+                "tags": ["experience-extraction", "cover-letter", "ncs-public"],
+                "metadata": {
+                    "model": "gpt-4o",
+                    "extraction_format": "STAR+L",
+                    "project": "pickd"
+                }
+            }
+        )
+        return result
+    except Exception as e:
+        raise ValueError(f"경험 추출 중 오류가 발생했습니다: {str(e)}")
+
+
+def extract_experiences_from_url(url: str) -> ExperienceExtractionResponse:
+    """
+    URL에서 텍스트를 추출한 후 경험 추출 로직을 실행합니다.
+    """
+    try:
+        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
+        response.raise_for_status()
+
+        soup = BeautifulSoup(response.text, "html.parser")
+        for script in soup(["script", "style"]):
+            script.decompose()
+
+        full_text = soup.get_text(separator="\n")
+        lines = (line.strip() for line in full_text.splitlines())
+        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+        full_text = "\n".join(chunk for chunk in chunks if chunk)
+
+        if not full_text.strip():
+            raise ValueError("URL에서 유의미한 텍스트를 추출하지 못했습니다.")
+
+        return extract_experiences_from_text(full_text)
+    except Exception as e:
+        raise ValueError(f"URL 분석 중 오류가 발생했습니다: {str(e)}")
+
+
+def extract_experiences_from_pdf(file_content: bytes) -> ExperienceExtractionResponse:
+    """
+    PDF 바이너리 데이터에서 PyMuPDF를 사용하여 텍스트를 추출한 후 경험 추출 로직을 실행합니다.
+    """
+    try:
+        doc = fitz.open(stream=file_content, filetype="pdf")
+        text_list = []
+        for page in doc:
+            text_list.append(page.get_text())
+        full_text = "\n".join(text_list)
+
+        if not full_text.strip():
+            raise ValueError("PDF에서 유의미한 텍스트를 추출하지 못했습니다.")
+
+        return extract_experiences_from_text(full_text)
+    except Exception as e:
+        raise ValueError(f"PDF 분석 중 오류가 발생했습니다: {str(e)}")
+
diff --git a/myeongsung/app/services/job_analysis_service.py b/myeongsung/app/services/job_analysis_service.py
@@ -71,7 +71,7 @@ def _analyze_with_vision(image_url: str, google_api_key: str) -> Optional[JobPos
         image = PIL.Image.open(io.BytesIO(img_response.content))
 
         response = client.models.generate_content(
-            model='gemini-2.0-flash',
+            model='gemini-2.5-flash',
             contents=[image, _VISION_SYSTEM_PROMPT],
             config=types.GenerateContentConfig(
                 response_mime_type='application/json',
@@ -127,74 +127,52 @@ def _analyze_with_text(markdown: str) -> Optional[JobPostingCreate]:
 # 4. 지능적 병합 엔진
 # ──────────────────────────────────────────────
 
-def _smart_merge(text_result: Optional[JobPostingCreate],
+def _smart_merge(text_result: Optional[JobPostingCreate], 
                  vision_result: Optional[JobPostingCreate]) -> JobPostingCreate:
     """
     텍스트 엔진과 비전 엔진의 결과를 지능적으로 병합.
     원칙: 텍스트 결과를 기본으로 하되, 비전 결과가 더 풍부한 필드는 비전 결과를 채택.
     """
-    # 둘 다 없으면 에러
-    if not text_result and not vision_result:
-        raise ValueError("텍스트 분석과 비전 분석 모두 실패했습니다.")
-
-    # 하나만 있으면 그것을 사용
-    if not text_result:
-        return vision_result
-    if not vision_result:
-        return text_result
-
-    merged = text_result.model_copy(deep=True)
-
-    # sections: 비전이 더 많은 부문을 발견했으면 비전 결과 채택
-    if len(vision_result.sections) > len(merged.sections):
-        merged.sections = vision_result.sections
-
-    # processes: 비어있으면 비전에서 가져옴
-    if not merged.processes and vision_result.processes:
-        merged.processes = vision_result.processes
-
-    # documents
-    if not merged.documents and vision_result.documents:
-        merged.documents = vision_result.documents
-
-    # company_info: 비전이 더 풍부하면 채택
-    if vision_result.company_info:
-        if not merged.company_info:
-            merged.company_info = vision_result.company_info
-        else:
-            # 개별 필드 단위로 보완
-            for field_name in vision_result.company_info.model_fields:
-                vision_val = getattr(vision_result.company_info, field_name, None)
-                merged_val = getattr(merged.company_info, field_name, None)
-                if vision_val and not merged_val:
-                    setattr(merged.company_info, field_name, vision_val)
-
-    # guideline
-    if vision_result.guideline:
-        if not merged.guideline:
-            merged.guideline = vision_result.guideline
-        else:
-            for field_name in vision_result.guideline.model_fields:
-                vision_val = getattr(vision_result.guideline, field_name, None)
-                merged_val = getattr(merged.guideline, field_name, None)
-                if vision_val and not merged_val:
-                    setattr(merged.guideline, field_name, vision_val)
-
-    # 단순 필드 보완 (비어있으면 비전에서 가져옴)
-    for field_name in ["employment_type", "headcount", "region_1depth", "workplace_address", "notice_url"]:
-        merged_val = getattr(merged, field_name, None)
-        vision_val = getattr(vision_result, field_name, None)
-        if not merged_val and vision_val:
-            setattr(merged, field_name, vision_val)
-
-    # citations 합치기 (중복 제거)
-    existing_contents = {c.content[:50] for c in merged.citations}
-    for cit in vision_result.citations:
-        if cit.content[:50] not in existing_contents:
-            merged.citations.append(cit)
-
-    return merged
-
+    try:
+        # 둘 다 없으면 에러
+        if not text_result and not vision_result:
+            raise ValueError("텍스트 분석과 비전 분석 모두 실패했습니다.")
+
+        # 하나만 있으면 그것을 사용
+        if not text_result:
+            return vision_result
+        if not vision_result:
+            return text_result
+
+        merged = text_result.model_copy(deep=True)
+
+        # 1. 텍스트 정보가 부족할 수 있는 모집 부문(sections) 보완
+        if vision_result.sections and len(vision_result.sections) > len(merged.sections):
+            merged.sections = vision_result.sections
+
+        # 2. 전형 절차(processes) 및 서류(documents) 보완
+        if not merged.processes and vision_result.processes:
+            merged.processes = vision_result.processes
+        if not merged.documents and vision_result.documents:
+            merged.documents = vision_result.documents
+
+        # 3. 최상위 필드 보완 (기업명, 공고명, 고용형태 등)
+        for field_name in ["company_name", "notice_name", "employment_type", "headcount", "region_1depth", "workplace_address", "notice_url"]:
+            merged_val = getattr(merged, field_name, None)
+            vision_val = getattr(vision_result, field_name, None)
+            if (merged_val is None or merged_val == "" or merged_val == 0) and vision_val:
+                setattr(merged, field_name, vision_val)
+
+        # 4. citations 합치기 (중복 제거 및 None 체크)
+        existing_contents = {c.content[:50] for c in merged.citations if c.content}
+        for cit in vision_result.citations:
+            if cit.content and cit.content[:50] not in existing_contents:
+                merged.citations.append(cit)
+
+        return merged
+    except Exception as e:
+        print(f"[!] 결과 병합 중 오류 발생: {e}")
+        return text_result if text_result else vision_result
 
 # ──────────────────────────────────────────────
 # 5. 메인 엔트리포인트
@@ -231,7 +209,7 @@ def analyze_job_url(url: str) -> JobPostingCreate:
     # 3. 비전 엔진 실행 (항상 실행하여 보완)
     vision_result = None
     if screenshot_url and google_api_key:
-        print("[*] 비전 엔진(Gemini 2.0 Flash) 분석 중...")
+        print("[*] 비전 엔진(Gemini 2.5 Flash) 분석 중...")
         vision_result = _analyze_with_vision(screenshot_url, google_api_key)
 
     # 4. 지능적 병합