import gradio as gr import pandas as pd import requests from bs4 import BeautifulSoup from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch from datetime import datetime, timedelta import yfinance as yf import matplotlib.pyplot as plt import matplotlib.dates as mdates import io import base64 import html # ----------------------------- # LOAD LOCAL TICKERS # ----------------------------- tickers = pd.read_csv("tickers.csv") def get_company_name(symbol: str): symbol = symbol.upper().strip() row = tickers[tickers["Symbol"] == symbol] if not row.empty: return True, row.iloc[0]["Symbol"], row.iloc[0]["Company"] try: ticker = yf.Ticker(symbol + ".NS") info = ticker.info if "longName" in info: return True, symbol, info["longName"] except Exception: pass return False, None, None # ----------------------------- # FETCH NEWS # ----------------------------- def fetch_news(query, max_items=50): url = f"https://news.google.com/rss/search?q={query}+stock&hl=en-IN&gl=IN&ceid=IN:en" try: r = requests.get(url, timeout=5) r.raise_for_status() except Exception: return [], 0 soup = BeautifulSoup(r.text, "lxml-xml") items = soup.find_all("item") news_list = [] for item in items[:max_items]: news_list.append({ "title": item.title.text, "link": item.link.text, "published": item.pubDate.text, }) return news_list, len(items) # ----------------------------- # DATE PARSER # ----------------------------- def parse_date(date_str): if not date_str: return None fmts = ["%a, %d %b %Y %H:%M:%S %Z", "%a, %d %b %Y %H:%M:%S %z"] for fmt in fmts: try: return datetime.strptime(date_str, fmt) except: pass return None # ----------------------------- # FILTER NEWS BY PERIOD # ----------------------------- def filter_news_by_period(news_list, period_days=7): cutoff = datetime.utcnow() - timedelta(days=period_days) filtered = [] for item in news_list: dt = parse_date(item.get("published", "")) if dt and dt.replace(tzinfo=None) >= cutoff: filtered.append(item) return filtered # ----------------------------- # LOAD FINBERT # ----------------------------- tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert") model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert") labels = [model.config.id2label[i] for i in range(model.config.num_labels)] def analyze_sentiment_batch(texts): batch = tokenizer(texts, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): output = model(**batch) probs = torch.nn.functional.softmax(output.logits, dim=1) results = [] for p in probs: results.append({ "positive": float(p[labels.index("positive")]), "neutral": float(p[labels.index("neutral")]), "negative": float(p[labels.index("negative")]), }) return results # ----------------------------- # MAIN PIPELINE # ----------------------------- def run_pipeline(user_input, period_option, max_news): # Extract symbol if user selected "SYMBOL - Company" if " - " in user_input: raw_symbol = user_input.split(" - ")[0].strip() else: raw_symbol = user_input.strip() period_map = {"Last 7 days": 7, "Last 10 days": 10, "Last 1 month": 30} period_days = period_map.get(period_option, 7) valid, symbol, company = get_company_name(raw_symbol) if not valid: return f"โ '{user_input}' is not a valid NSE stock symbol.", "", "", "", "", "", "" # Fetch news query = company + " stock" news, total_items = fetch_news(query, max_items=int(max_news)) news = filter_news_by_period(news, period_days=period_days) fetched_count = min(total_items, int(max_news)) if len(news) == 0: return f"No news found for {company} in {period_option}", "", "", "", "", "", "" info_msg = f"**Found {len(news)} headlines from the last {period_days} days (fetched {fetched_count} / requested {max_news}).**" # Sentiment texts = [n["title"] for n in news] sentiments = analyze_sentiment_batch(texts) results = [] counts = {"positive": 0, "neutral": 0, "negative": 0} overall_sums = {"positive": 0.0, "neutral": 0.0, "negative": 0.0} weighted_counts = {"positive": 0.0, "neutral": 0.0, "negative": 0.0} date_sentiments = {} date_counts = {} for item, sent in zip(news, sentiments): pos, neu, neg = sent["positive"], sent["neutral"], sent["negative"] overall = round(pos - neg, 3) pred = max(["positive", "neutral", "negative"], key=lambda k: sent[k]) counts[pred] += 1 overall_sums[pred] += overall for k in ["positive", "neutral", "negative"]: weighted_counts[k] += sent[k] dt = parse_date(item["published"]) if dt: dkey = dt.date().isoformat() date_sentiments.setdefault(dkey, []).append(overall) date_counts.setdefault(dkey, []).append(pred) results.append({ "headline": item["title"], "positive": round(pos, 3), "neutral": round(neu, 3), "negative": round(neg, 3), "overall": overall, "published": item["published"], "link": item["link"] }) results.sort(key=lambda x: parse_date(x["published"]), reverse=True) # ----------------------------- # SUMMARY # ----------------------------- summary = f""" ### ๐ Sentiment Summary for {company} ({symbol}) โ {period_option} | Sentiment | Count | Weighted Count | |----------|-------|-----------| | ๐ Positive | {counts['positive']} | {weighted_counts['positive']:.2f} | | ๐ Neutral | {counts['neutral']} | {weighted_counts['neutral']:.2f} | | ๐ Negative | {counts['negative']} | {weighted_counts['negative']:.2f} | | Total | {len(news)} | {len(news)} | """ # ----------------------------- # HEADLINES TABLE # ----------------------------- table = """
| Published | Headline | Positive | Neutral | Negative | Sentiment Score (POS-NEG) |
Link | {value} | " else: return f"{value} | " color = "green" if r["overall"] > 0 else "red" if r["overall"] < 0 else "black" table += f"
|---|---|---|---|---|---|---|
| {r['published']} | " table += f"{html.escape(r['headline'])} | " table += cell_html(r['positive'], "positive") table += cell_html(r['neutral'], "neutral") table += cell_html(r['negative'], "negative") table += f"{r['overall']} | " table += f"Open | " table += "
Enter an NSE/BSE stock symbol. The app uses FinBERT + Google News to generate sentiment analysis of recent headlines.
") with gr.Row(): with gr.Column(scale=1): symbol_in = gr.Textbox(label="Enter Stock Symbol (e.g., RELIANCE, TCS)") with gr.Column(scale=1): period_in = gr.Dropdown( ["Last 7 days", "Last 10 days", "Last 1 month"], value="Last 7 days", label="Select Period" ) with gr.Column(scale=1): max_news_in = gr.Slider( minimum=20, maximum=100, step=1, value=50, label="Number of Headlines to Fetch" ) btn = gr.Button("Analyze") with gr.Row(): info_out = gr.Markdown() with gr.Row(): summary_out = gr.Markdown() chart1_out = gr.HTML(label="Daily Headline Counts") with gr.Row(): chart2_out = gr.HTML(label="Daily Sentiment Trend") chart3_out = gr.HTML(label="Daily Stock Price + Sentiment Trend") with gr.Row(): table_out = gr.HTML(label="Headlines Table") # Add metrics summary in a separate row to make it clearly visible with gr.Row(): metrics_summary_out = gr.Markdown(label="Key Metrics Summary") btn.click( run_pipeline, inputs=[symbol_in, period_in, max_news_in], outputs=[summary_out, info_out, chart1_out, chart2_out, table_out, chart3_out, metrics_summary_out] ) ui.launch()