Skip to content

Commit 36f5f0e

Browse files
Create app.py
This is created by me as part one of the company assesment as first round in their hiring finding best youtube videos using youtube api and Gemini llm
1 parent 0cd6634 commit 36f5f0e

File tree

1 file changed

+180
-0
lines changed
  • Youtube_video_frinder_using_GeminiLLM

1 file changed

+180
-0
lines changed
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
import os
2+
import datetime
3+
from googleapiclient.discovery import build
4+
import google.generativeai as genai
5+
6+
# ——— CONFIG ———
7+
# Initialize clients with environment variables
8+
yt = build("youtube", "v3", developerKey=os.environ["YT_API_KEY"])
9+
10+
# Configure the Google Generative AI client
11+
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
12+
13+
# Initialize the Gemini model
14+
model = genai.GenerativeModel('gemini-1.5-flash-latest')
15+
16+
17+
def search_videos(query, max_filtered_results=20):
18+
"""
19+
Search for YouTube videos matching a query, filtering by recency and duration.
20+
21+
This function keeps searching until it finds enough videos that meet the criteria
22+
or exhausts the search results.
23+
"""
24+
# Calculate publishedAfter timestamp (14 days ago)
25+
fourteen_days_ago = (datetime.datetime.utcnow()
26+
- datetime.timedelta(days=14)).isoformat("T") + "Z"
27+
28+
filtered_videos = []
29+
next_page_token = None
30+
page_count = 0
31+
max_pages = 5 # Limit the number of pages to search to avoid excessive API calls
32+
33+
# Continue searching until we have enough filtered videos or run out of results
34+
while len(filtered_videos) < max_filtered_results and page_count < max_pages:
35+
# Step 1: Search for videos matching the query
36+
search_response = yt.search().list(
37+
q=query,
38+
part="id,snippet",
39+
type="video",
40+
order="relevance",
41+
publishedAfter=fourteen_days_ago,
42+
maxResults=50, # Maximum allowed by the API
43+
pageToken=next_page_token
44+
).execute()
45+
46+
page_count += 1
47+
48+
# Step 2: Collect video IDs from this page
49+
video_ids = [item["id"]["videoId"] for item in search_response.get("items", [])]
50+
51+
# Break if no more videos found
52+
if not video_ids:
53+
break
54+
55+
# Step 3: Get details for the fetched videos
56+
details = yt.videos().list(
57+
part="contentDetails,snippet",
58+
id=",".join(video_ids)
59+
).execute()
60+
61+
# Step 4: Filter by duration (4–20 minutes)
62+
for item in details.get("items", []):
63+
try:
64+
# Parse duration (ISO 8601 format, e.g. "PT5M30S")
65+
dur = item["contentDetails"]["duration"].replace("PT","")
66+
67+
# Skip videos with hours or without minutes
68+
if "H" in dur or "M" not in dur:
69+
continue
70+
71+
# Split minutes and seconds
72+
parts = dur.split("M")
73+
mins = int(parts[0])
74+
secs = parts[1].replace("S","") if len(parts) > 1 else "0"
75+
seconds = int(secs) if secs else 0
76+
77+
total_seconds = mins * 60 + seconds
78+
79+
# Filter by duration (4 to 20 minutes inclusive)
80+
if 4 * 60 <= total_seconds <= 20 * 60:
81+
filtered_videos.append({
82+
"id": item["id"],
83+
"title": item["snippet"]["title"],
84+
"duration": total_seconds,
85+
"publishedAt": item["snippet"]["publishedAt"]
86+
})
87+
88+
# If we've found enough videos, we can stop
89+
if len(filtered_videos) >= max_filtered_results:
90+
break
91+
except Exception as e:
92+
print(f"Could not parse duration for video {item.get('id', 'N/A')}: {e}")
93+
continue
94+
95+
# Check if there are more pages of results
96+
next_page_token = search_response.get("nextPageToken")
97+
if not next_page_token:
98+
break
99+
100+
print(f"Found {len(filtered_videos)} qualifying videos so far. Searching next page...")
101+
102+
print(f"Search completed. Found {len(filtered_videos)} videos meeting criteria.")
103+
return filtered_videos
104+
105+
106+
def score_title(title, query):
107+
"""Score a video title's relevance to the query using Gemini AI."""
108+
prompt = (
109+
f"Query: {query}\n"
110+
f"Title: {title}\n"
111+
"Rate relevance & quality 1–10 (just give the number)."
112+
)
113+
try:
114+
response = model.generate_content(prompt)
115+
score_text = response.text.strip()
116+
# Try to extract just the number if there's additional text
117+
import re
118+
match = re.search(r'\b([0-9]|10)(\.[0-9]+)?\b', score_text)
119+
if match:
120+
score = float(match.group(0))
121+
else:
122+
score = float(score_text)
123+
return score
124+
except ValueError:
125+
print(f"Model returned non-numeric score for '{title}': '{score_text}'")
126+
return 5.0 # Default middle score instead of 0
127+
except Exception as e:
128+
print(f"Error scoring title '{title}': {e}")
129+
if 'response' in locals() and hasattr(response, 'text'):
130+
print(f"API response text: {response.text}")
131+
return 5.0 # Default middle score
132+
133+
134+
def pick_best(query, num_results=20):
135+
"""
136+
Find and score the best YouTube videos for a query.
137+
138+
Args:
139+
query: Search query string
140+
num_results: Number of top videos to return
141+
"""
142+
# Get more videos than we need to ensure we have enough after scoring
143+
vids = search_videos(query, max_filtered_results=max(30, num_results * 1.5))
144+
145+
if not vids:
146+
print("No suitable videos found after applying filters.")
147+
return
148+
149+
# Score each video
150+
print(f"Scoring {len(vids)} videos...")
151+
for i, v in enumerate(vids):
152+
v["score"] = score_title(v["title"], query)
153+
print(f" Scored video {i+1}/{len(vids)}: '{v['title']}' - Score: {v['score']:.2f}")
154+
155+
# Sort by score in descending order
156+
vids.sort(key=lambda x: x.get("score", 0.0), reverse=True)
157+
158+
# Print the top num_results
159+
result_count = min(num_results, len(vids))
160+
print(f"\n--- Top {result_count} Relevant Videos ---")
161+
162+
for i, video in enumerate(vids[:num_results]):
163+
print(f"\n{i+1}.")
164+
print(f" • Title: {video.get('title', 'N/A')}")
165+
print(f" • URL: https://youtu.be/{video.get('id', 'N/A')}")
166+
print(f" • Score: {video.get('score', 0.0):.2f}")
167+
duration_sec = video.get('duration', 0)
168+
print(f" • Duration: {duration_sec // 60}m{duration_sec % 60:02d}s")
169+
print(f" • Published: {video.get('publishedAt', 'N/A')}")
170+
171+
172+
# —— RUN IT! ——
173+
if __name__ == "__main__":
174+
# Check if API keys are set
175+
if "YT_API_KEY" not in os.environ or "GEMINI_API_KEY" not in os.environ:
176+
print("Error: YouTube and/or Gemini API keys not set in environment variables.")
177+
else:
178+
user_query = input("Enter your search (voice-to-text or text): ")
179+
# Call pick_best with the desired number of results
180+
pick_best(user_query, num_results=20)

0 commit comments

Comments
 (0)