import gradio as gr import pandas as pd import requests from bs4 import BeautifulSoup from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch from datetime import datetime, timedelta import yfinance as yf import matplotlib.pyplot as plt import matplotlib.dates as mdates import io import base64 import html # ----------------------------- # LOAD LOCAL TICKERS # ----------------------------- tickers = pd.read_csv("tickers.csv") def get_company_name(symbol: str): symbol = symbol.upper().strip() row = tickers[tickers["Symbol"] == symbol] if not row.empty: return True, row.iloc[0]["Symbol"], row.iloc[0]["Company"] try: ticker = yf.Ticker(symbol + ".NS") info = ticker.info if "longName" in info: return True, symbol, info["longName"] except Exception: pass return False, None, None # ----------------------------- # FETCH NEWS # ----------------------------- def fetch_news(query, max_items=50): url = f"https://news.google.com/rss/search?q={query}+stock&hl=en-IN&gl=IN&ceid=IN:en" try: r = requests.get(url, timeout=5) r.raise_for_status() except Exception: return [], 0 soup = BeautifulSoup(r.text, "lxml-xml") items = soup.find_all("item") news_list = [] for item in items[:max_items]: news_list.append({ "title": item.title.text, "link": item.link.text, "published": item.pubDate.text, }) return news_list, len(items) # ----------------------------- # DATE PARSER # ----------------------------- def parse_date(date_str): if not date_str: return None fmts = ["%a, %d %b %Y %H:%M:%S %Z", "%a, %d %b %Y %H:%M:%S %z"] for fmt in fmts: try: return datetime.strptime(date_str, fmt) except: pass return None # ----------------------------- # FILTER NEWS BY PERIOD # ----------------------------- def filter_news_by_period(news_list, period_days=7): cutoff = datetime.utcnow() - timedelta(days=period_days) filtered = [] for item in news_list: dt = parse_date(item.get("published", "")) if dt and dt.replace(tzinfo=None) >= cutoff: filtered.append(item) return filtered # ----------------------------- # LOAD FINBERT # ----------------------------- tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert") model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert") labels = [model.config.id2label[i] for i in range(model.config.num_labels)] def analyze_sentiment_batch(texts): batch = tokenizer(texts, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): output = model(**batch) probs = torch.nn.functional.softmax(output.logits, dim=1) results = [] for p in probs: results.append({ "positive": float(p[labels.index("positive")]), "neutral": float(p[labels.index("neutral")]), "negative": float(p[labels.index("negative")]), }) return results # ----------------------------- # MAIN PIPELINE # ----------------------------- def run_pipeline(user_input, period_option, max_news): # Extract symbol if user selected "SYMBOL - Company" if " - " in user_input: raw_symbol = user_input.split(" - ")[0].strip() else: raw_symbol = user_input.strip() period_map = {"Last 7 days": 7, "Last 10 days": 10, "Last 1 month": 30} period_days = period_map.get(period_option, 7) valid, symbol, company = get_company_name(raw_symbol) if not valid: return f"โŒ '{user_input}' is not a valid NSE stock symbol.", "", "", "", "", "", "" # Fetch news query = company + " stock" news, total_items = fetch_news(query, max_items=int(max_news)) news = filter_news_by_period(news, period_days=period_days) fetched_count = min(total_items, int(max_news)) if len(news) == 0: return f"No news found for {company} in {period_option}", "", "", "", "", "", "" info_msg = f"**Found {len(news)} headlines from the last {period_days} days (fetched {fetched_count} / requested {max_news}).**" # Sentiment texts = [n["title"] for n in news] sentiments = analyze_sentiment_batch(texts) results = [] counts = {"positive": 0, "neutral": 0, "negative": 0} overall_sums = {"positive": 0.0, "neutral": 0.0, "negative": 0.0} weighted_counts = {"positive": 0.0, "neutral": 0.0, "negative": 0.0} date_sentiments = {} date_counts = {} for item, sent in zip(news, sentiments): pos, neu, neg = sent["positive"], sent["neutral"], sent["negative"] overall = round(pos - neg, 3) pred = max(["positive", "neutral", "negative"], key=lambda k: sent[k]) counts[pred] += 1 overall_sums[pred] += overall for k in ["positive", "neutral", "negative"]: weighted_counts[k] += sent[k] dt = parse_date(item["published"]) if dt: dkey = dt.date().isoformat() date_sentiments.setdefault(dkey, []).append(overall) date_counts.setdefault(dkey, []).append(pred) results.append({ "headline": item["title"], "positive": round(pos, 3), "neutral": round(neu, 3), "negative": round(neg, 3), "overall": overall, "published": item["published"], "link": item["link"] }) results.sort(key=lambda x: parse_date(x["published"]), reverse=True) # ----------------------------- # SUMMARY # ----------------------------- summary = f""" ### ๐Ÿ“Š Sentiment Summary for {company} ({symbol}) โ€” {period_option} | Sentiment | Count | Weighted Count | |----------|-------|-----------| | ๐Ÿ˜Š Positive | {counts['positive']} | {weighted_counts['positive']:.2f} | | ๐Ÿ˜ Neutral | {counts['neutral']} | {weighted_counts['neutral']:.2f} | | ๐Ÿ˜ž Negative | {counts['negative']} | {weighted_counts['negative']:.2f} | | Total | {len(news)} | {len(news)} | """ # ----------------------------- # HEADLINES TABLE # ----------------------------- table = """ """ for i, r in enumerate(results): row_color = "#ffffff" if i % 2 == 0 else "#f9f9f9" # Determine highest sentiment max_sent = max(["positive", "neutral", "negative"], key=lambda k: r[k]) # Colors color_map = {"positive": "green", "neutral": "gray", "negative": "red"} # Helper: build cell HTML def cell_html(value, sent_type): if max_sent == sent_type: return f"" else: return f"" color = "green" if r["overall"] > 0 else "red" if r["overall"] < 0 else "black" table += f"" table += f"" table += f"" table += cell_html(r['positive'], "positive") table += cell_html(r['neutral'], "neutral") table += cell_html(r['negative'], "negative") table += f"" table += f"" table += "" table += "
Published Headline Positive Neutral Negative Sentiment Score
(POS-NEG)
Link
{value}{value}
{r['published']}{html.escape(r['headline'])}{r['overall']}Open
" # ----------------------------- # CSV DOWNLOAD # ----------------------------- df = pd.DataFrame(results) csv_buf = io.StringIO() df.to_csv(csv_buf, index=False) csv_data = "data:text/csv;base64," + base64.b64encode(csv_buf.getvalue().encode()).decode() csv_link_html = f"โฌ‡๏ธ Download CSV" # ----------------------------- # CHARTS # ----------------------------- chart_counts_html = chart_sentiment_html = chart_price_sentiment_html = "" if date_sentiments: dates = sorted(date_sentiments.keys()) avg_sentiments = [sum(date_sentiments[d])/len(date_sentiments[d]) for d in dates] pos_counts = [date_counts[d].count("positive") for d in dates] neu_counts = [date_counts[d].count("neutral") for d in dates] neg_counts = [date_counts[d].count("negative") for d in dates] # Chart 1 fig1, ax1 = plt.subplots(figsize=(6, 3)) ax1.bar(dates, pos_counts, color="green") ax1.bar(dates, neu_counts, bottom=pos_counts, color="gray") ax1.bar(dates, neg_counts, bottom=[p + n for p, n in zip(pos_counts, neu_counts)], color="red") ax1.set_xlabel("") ax1.set_title("Daily Headline Counts") ax1.tick_params(axis='x', rotation=60, labelsize=7) plt.tight_layout() buf1 = io.BytesIO() fig1.savefig(buf1, format="png") buf1.seek(0) chart_counts_html = f"" plt.close(fig1) # Chart 2 fig2, ax2 = plt.subplots(figsize=(6, 3)) colors = ["green" if x > 0 else "red" if x < 0 else "gray" for x in avg_sentiments] ax2.bar(dates, avg_sentiments, color=colors) ax2.axhline(0, color="black", linestyle="--") ax2.set_title("Daily Sentiment Trend") ax2.tick_params(axis='x', rotation=60, labelsize=7) plt.tight_layout() buf2 = io.BytesIO() fig2.savefig(buf2, format="png") buf2.seek(0) chart_sentiment_html = f"" plt.close(fig2) # Chart 3 - Stock Price + Sentiment try: ticker_data = yf.Ticker(symbol + ".NS").history(period=f"{period_days}d") if not ticker_data.empty: fig3, ax3 = plt.subplots(figsize=(6, 3)) ax3.plot(ticker_data.index, ticker_data['Close'], color="blue", marker="o", label="Close Price") sentiment_dates = [pd.to_datetime(d) for d in dates] ax3_twin = ax3.twinx() ax3_twin.plot(sentiment_dates, avg_sentiments, color="orange", marker="o", label="Sentiment Score") lines1, labels1 = ax3.get_legend_handles_labels() lines2, labels2 = ax3_twin.get_legend_handles_labels() ax3.legend(lines1 + lines2, labels1 + labels2, loc="upper left") ax3.set_title("Daily Stock Price + Sentiment Trend") ax3.xaxis.set_major_formatter(mdates.DateFormatter("%d-%b")) fig3.autofmt_xdate() plt.tight_layout() buf3 = io.BytesIO() fig3.savefig(buf3, format="png") buf3.seek(0) chart_price_sentiment_html = f"" plt.close(fig3) except Exception: chart_price_sentiment_html = "" # ----------------------------- # KEY METRICS SUMMARY (Markdown) # ----------------------------- metrics_summary_text = """ ### ๐Ÿ“Œ Key Metrics Summary | Metric | Description | |--------|------------| | **Count** | Number of headlines predicted as Positive / Neutral / Negative. | | **Weighted Count** | Sum of the raw sentiment probabilities for each category across all headlines. Provides a โ€œconfidence-weightedโ€ measure of sentiment dominance. | | **Sentiment Score (per headline)** | `positive โˆ’ negative` probability. Shows whether the headline is more positive or negative. | | **Dominant Sentiment (per headline)** | The sentiment with the **highest probability** among positive, neutral, or negative. Highlighted in the table. | **Charts:** - **Daily Headline Counts**: Shows how many headlines per day fall into each sentiment. - **Daily Sentiment Trend**: Shows average overall sentiment per day (positive โˆ’ negative). - **Daily Stock Price + Sentiment Trend**: Plots stock closing price alongside daily sentiment for easy correlation. """ return ( summary, info_msg, chart_counts_html, chart_sentiment_html, table + "
" + csv_link_html, chart_price_sentiment_html, metrics_summary_text ) # ----------------------------- # GRADIO UI # ----------------------------- with gr.Blocks(title="Indian Stock Market Sentiment Analyzer") as ui: gr.Markdown("

๐Ÿ‡ฎ๐Ÿ‡ณ Indian Stock Market Sentiment Analyzer

") gr.Markdown("

Enter an NSE/BSE stock symbol. The app uses FinBERT + Google News to generate sentiment analysis of recent headlines.

") with gr.Row(): with gr.Column(scale=1): symbol_in = gr.Textbox(label="Enter Stock Symbol (e.g., RELIANCE, TCS)") with gr.Column(scale=1): period_in = gr.Dropdown( ["Last 7 days", "Last 10 days", "Last 1 month"], value="Last 7 days", label="Select Period" ) with gr.Column(scale=1): max_news_in = gr.Slider( minimum=20, maximum=100, step=1, value=50, label="Number of Headlines to Fetch" ) btn = gr.Button("Analyze") with gr.Row(): info_out = gr.Markdown() with gr.Row(): summary_out = gr.Markdown() chart1_out = gr.HTML(label="Daily Headline Counts") with gr.Row(): chart2_out = gr.HTML(label="Daily Sentiment Trend") chart3_out = gr.HTML(label="Daily Stock Price + Sentiment Trend") with gr.Row(): table_out = gr.HTML(label="Headlines Table") # Add metrics summary in a separate row to make it clearly visible with gr.Row(): metrics_summary_out = gr.Markdown(label="Key Metrics Summary") btn.click( run_pipeline, inputs=[symbol_in, period_in, max_news_in], outputs=[summary_out, info_out, chart1_out, chart2_out, table_out, chart3_out, metrics_summary_out] ) ui.launch()