saghircse commited on
Commit
bd01586
·
verified ·
1 Parent(s): d546325

Change layout and charts

Browse files
Files changed (1) hide show
  1. app.py +397 -398
app.py CHANGED
@@ -1,398 +1,397 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import requests
4
- from bs4 import BeautifulSoup
5
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
- import torch
7
- from datetime import datetime, timedelta
8
- import yfinance as yf
9
- import matplotlib.pyplot as plt
10
- import matplotlib.dates as mdates
11
- import io
12
- import base64
13
- import html
14
-
15
- # -----------------------------
16
- # LOAD LOCAL TICKERS
17
- # -----------------------------
18
- tickers = pd.read_csv("tickers.csv")
19
-
20
- def get_company_name(symbol: str):
21
- symbol = symbol.upper().strip()
22
- row = tickers[tickers["Symbol"] == symbol]
23
- if not row.empty:
24
- return True, row.iloc[0]["Symbol"], row.iloc[0]["Company"]
25
-
26
- try:
27
- ticker = yf.Ticker(symbol + ".NS")
28
- info = ticker.info
29
- if "longName" in info:
30
- return True, symbol, info["longName"]
31
- except Exception:
32
- pass
33
-
34
- return False, None, None
35
-
36
- # -----------------------------
37
- # FETCH NEWS
38
- # -----------------------------
39
- def fetch_news(query, max_items=50):
40
- url = f"https://news.google.com/rss/search?q={query}+stock&hl=en-IN&gl=IN&ceid=IN:en"
41
- try:
42
- r = requests.get(url, timeout=5)
43
- r.raise_for_status()
44
- except Exception:
45
- return [], 0
46
-
47
- soup = BeautifulSoup(r.text, "lxml-xml")
48
- items = soup.find_all("item")
49
-
50
- news_list = []
51
- for item in items[:max_items]:
52
- news_list.append({
53
- "title": item.title.text,
54
- "link": item.link.text,
55
- "published": item.pubDate.text,
56
- })
57
-
58
- return news_list, len(items)
59
-
60
- # -----------------------------
61
- # DATE PARSER
62
- # -----------------------------
63
- def parse_date(date_str):
64
- if not date_str:
65
- return None
66
- fmts = ["%a, %d %b %Y %H:%M:%S %Z", "%a, %d %b %Y %H:%M:%S %z"]
67
- for fmt in fmts:
68
- try:
69
- return datetime.strptime(date_str, fmt)
70
- except:
71
- pass
72
- return None
73
-
74
- # -----------------------------
75
- # FILTER NEWS BY PERIOD
76
- # -----------------------------
77
- def filter_news_by_period(news_list, period_days=7):
78
- cutoff = datetime.utcnow() - timedelta(days=period_days)
79
- filtered = []
80
- for item in news_list:
81
- dt = parse_date(item.get("published", ""))
82
- if dt and dt.replace(tzinfo=None) >= cutoff:
83
- filtered.append(item)
84
- return filtered
85
-
86
- # -----------------------------
87
- # LOAD FINBERT
88
- # -----------------------------
89
- tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
90
- model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
91
- labels = [model.config.id2label[i] for i in range(model.config.num_labels)]
92
-
93
- def analyze_sentiment_batch(texts):
94
- batch = tokenizer(texts, return_tensors="pt", truncation=True, padding=True)
95
- with torch.no_grad():
96
- output = model(**batch)
97
- probs = torch.nn.functional.softmax(output.logits, dim=1)
98
-
99
- results = []
100
- for p in probs:
101
- results.append({
102
- "positive": float(p[labels.index("positive")]),
103
- "neutral": float(p[labels.index("neutral")]),
104
- "negative": float(p[labels.index("negative")]),
105
- })
106
- return results
107
-
108
- # -----------------------------
109
- # MAIN PIPELINE
110
- # -----------------------------
111
- def run_pipeline(user_input, period_option, max_news):
112
-
113
- # Extract symbol if user selected "SYMBOL - Company"
114
- if " - " in user_input:
115
- raw_symbol = user_input.split(" - ")[0].strip()
116
- else:
117
- raw_symbol = user_input.strip()
118
-
119
- period_map = {"Last 7 days": 7, "Last 10 days": 10, "Last 1 month": 30}
120
- period_days = period_map.get(period_option, 7)
121
-
122
- valid, symbol, company = get_company_name(raw_symbol)
123
- if not valid:
124
- return f"❌ '{user_input}' is not a valid NSE stock symbol.", "", "", "", "", "", ""
125
-
126
- # Fetch news
127
- query = company + " stock"
128
- news, total_items = fetch_news(query, max_items=int(max_news))
129
- news = filter_news_by_period(news, period_days=period_days)
130
-
131
- fetched_count = min(total_items, int(max_news))
132
-
133
- if len(news) == 0:
134
- return f"No news found for {company} in {period_option}", "", "", "", "", "", ""
135
-
136
- info_msg = f"**Showing {len(news)} headlines from the last {period_days} days (fetched {fetched_count} / requested {max_news}).**"
137
-
138
- # Sentiment
139
- texts = [n["title"] for n in news]
140
- sentiments = analyze_sentiment_batch(texts)
141
-
142
- results = []
143
- counts = {"positive": 0, "neutral": 0, "negative": 0}
144
- overall_sums = {"positive": 0.0, "neutral": 0.0, "negative": 0.0}
145
- weighted_counts = {"positive": 0.0, "neutral": 0.0, "negative": 0.0}
146
- date_sentiments = {}
147
- date_counts = {}
148
-
149
- for item, sent in zip(news, sentiments):
150
- pos, neu, neg = sent["positive"], sent["neutral"], sent["negative"]
151
- overall = round(pos - neg, 3)
152
-
153
- pred = max(["positive", "neutral", "negative"], key=lambda k: sent[k])
154
- counts[pred] += 1
155
- overall_sums[pred] += overall
156
-
157
- for k in ["positive", "neutral", "negative"]:
158
- weighted_counts[k] += sent[k]
159
-
160
- dt = parse_date(item["published"])
161
- if dt:
162
- dkey = dt.date().isoformat()
163
- date_sentiments.setdefault(dkey, []).append(overall)
164
- date_counts.setdefault(dkey, []).append(pred)
165
-
166
- results.append({
167
- "headline": item["title"],
168
- "positive": round(pos, 3),
169
- "neutral": round(neu, 3),
170
- "negative": round(neg, 3),
171
- "overall": overall,
172
- "published": item["published"],
173
- "link": item["link"]
174
- })
175
-
176
- results.sort(key=lambda x: parse_date(x["published"]), reverse=True)
177
-
178
- avg_overall = {k: (overall_sums[k] / counts[k] if counts[k] > 0 else 0.0)
179
- for k in counts}
180
-
181
- # -----------------------------
182
- # SUMMARY
183
- # -----------------------------
184
- summary = f"""
185
- ### 📊 Sentiment Summary for {company} ({symbol}) — {period_option}
186
-
187
- | Sentiment | Count | % | Avg Sentiment (Overall) | Weighted Count |
188
- |----------|-------|------|--------------|-----------|
189
- | 😊 Positive | {counts['positive']} | {counts['positive']/len(news)*100:.1f}% | {avg_overall['positive']:.2f} | {weighted_counts['positive']:.2f} |
190
- | 😐 Neutral | {counts['neutral']} | {counts['neutral']/len(news)*100:.1f}% | {avg_overall['neutral']:.2f} | {weighted_counts['neutral']:.2f} |
191
- | 😞 Negative | {counts['negative']} | {counts['negative']/len(news)*100:.1f}% | {avg_overall['negative']:.2f} | {weighted_counts['negative']:.2f} |
192
-
193
- **Headlines in period:** {len(news)}
194
- """
195
-
196
- # -----------------------------
197
- # HEADLINES TABLE
198
- # -----------------------------
199
- table = """
200
- <table style='width:100%; border-collapse: collapse;'>
201
- <tr style='background-color:#f2f2f2'>
202
- <th>Published</th>
203
- <th>Headline</th>
204
- <th>Positive</th>
205
- <th>Neutral</th>
206
- <th>Negative</th>
207
- <th>Overall (POS-NEG)</th>
208
- <th>Link</th>
209
- </tr>
210
- """
211
-
212
- for i, r in enumerate(results):
213
- row_color = "#ffffff" if i % 2 == 0 else "#f9f9f9"
214
-
215
- # Determine highest sentiment
216
- max_sent = max(["positive", "neutral", "negative"], key=lambda k: r[k])
217
-
218
- # Colors
219
- color_map = {"positive": "green", "neutral": "gray", "negative": "red"}
220
-
221
- # Helper: build cell HTML
222
- def cell_html(value, sent_type):
223
- if max_sent == sent_type:
224
- return f"<td style='color:{color_map[sent_type]}; font-weight:bold'>{value}</td>"
225
- else:
226
- return f"<td>{value}</td>"
227
-
228
- color = "green" if r["overall"] > 0 else "red" if r["overall"] < 0 else "black"
229
-
230
- table += f"<tr style='background-color:{row_color};'>"
231
- table += f"<td>{r['published']}</td>"
232
- table += f"<td>{html.escape(r['headline'])}</td>"
233
- table += cell_html(r['positive'], "positive")
234
- table += cell_html(r['neutral'], "neutral")
235
- table += cell_html(r['negative'], "negative")
236
- table += f"<td style='color:{color}; font-weight:bold'>{r['overall']}</td>"
237
- table += f"<td><a href='{r['link']}' target='_blank'>Open</a></td>"
238
- table += "</tr>"
239
-
240
- table += "</table>"
241
-
242
- # -----------------------------
243
- # CSV DOWNLOAD
244
- # -----------------------------
245
- df = pd.DataFrame(results)
246
- csv_buf = io.StringIO()
247
- df.to_csv(csv_buf, index=False)
248
- csv_data = "data:text/csv;base64," + base64.b64encode(csv_buf.getvalue().encode()).decode()
249
- csv_link_html = f"<a href='{csv_data}' download='{symbol}_news_sentiment.csv'>⬇️ Download CSV</a>"
250
-
251
- # -----------------------------
252
- # CHARTS
253
- # -----------------------------
254
- chart_counts_html = chart_sentiment_html = chart_price_sentiment_html = ""
255
-
256
- if date_sentiments:
257
- dates = sorted(date_sentiments.keys())
258
- avg_sentiments = [sum(date_sentiments[d])/len(date_sentiments[d]) for d in dates]
259
- pos_counts = [date_counts[d].count("positive") for d in dates]
260
- neu_counts = [date_counts[d].count("neutral") for d in dates]
261
- neg_counts = [date_counts[d].count("negative") for d in dates]
262
-
263
- # Chart 1
264
- fig1, ax1 = plt.subplots(figsize=(6, 4))
265
- ax1.bar(dates, pos_counts, color="green")
266
- ax1.bar(dates, neu_counts, bottom=pos_counts, color="gray")
267
- ax1.bar(dates, neg_counts, bottom=[p + n for p, n in zip(pos_counts, neu_counts)], color="red")
268
- ax1.set_xlabel("")
269
- ax1.set_title("Daily Headline Counts")
270
- ax1.tick_params(axis='x', rotation=60, labelsize=7)
271
- plt.tight_layout()
272
- buf1 = io.BytesIO()
273
- fig1.savefig(buf1, format="png")
274
- buf1.seek(0)
275
- chart_counts_html = f"<img src='data:image/png;base64,{base64.b64encode(buf1.read()).decode()}' style='width:100%; max-width:700px;'/>"
276
- plt.close(fig1)
277
-
278
- # Chart 2
279
- fig2, ax2 = plt.subplots(figsize=(6, 4))
280
- colors = ["green" if x > 0 else "red" if x < 0 else "gray" for x in avg_sentiments]
281
- ax2.bar(dates, avg_sentiments, color=colors)
282
- ax2.axhline(0, color="black", linestyle="--")
283
- ax2.set_title("Daily Sentiment Trend")
284
- ax2.tick_params(axis='x', rotation=60, labelsize=7)
285
- plt.tight_layout()
286
- buf2 = io.BytesIO()
287
- fig2.savefig(buf2, format="png")
288
- buf2.seek(0)
289
- chart_sentiment_html = f"<img src='data:image/png;base64,{base64.b64encode(buf2.read()).decode()}' style='width:100%; max-width:700px;'/>"
290
- plt.close(fig2)
291
-
292
- # Chart 3 - Stock Price + Sentiment
293
- try:
294
- ticker_data = yf.Ticker(symbol + ".NS").history(period=f"{period_days}d")
295
-
296
- if not ticker_data.empty:
297
- fig3, ax3 = plt.subplots(figsize=(6, 4))
298
- ax3.plot(ticker_data.index, ticker_data['Close'], color="blue", label="Close Price")
299
-
300
- sentiment_dates = [pd.to_datetime(d) for d in dates]
301
- ax3_twin = ax3.twinx()
302
- ax3_twin.plot(sentiment_dates, avg_sentiments, color="orange", marker="o", label="Sentiment Score")
303
-
304
- lines1, labels1 = ax3.get_legend_handles_labels()
305
- lines2, labels2 = ax3_twin.get_legend_handles_labels()
306
- ax3.legend(lines1 + lines2, labels1 + labels2, loc="upper left")
307
-
308
- ax3.set_title("Daily Stock Price + Sentiment Trend")
309
- ax3.xaxis.set_major_formatter(mdates.DateFormatter("%d-%b"))
310
- fig3.autofmt_xdate()
311
-
312
- plt.tight_layout()
313
- buf3 = io.BytesIO()
314
- fig3.savefig(buf3, format="png")
315
- buf3.seek(0)
316
- chart_price_sentiment_html = f"<img src='data:image/png;base64,{base64.b64encode(buf3.read()).decode()}' style='width:100%; max-width:700px;'/>"
317
- plt.close(fig3)
318
-
319
- except Exception:
320
- chart_price_sentiment_html = ""
321
-
322
- # -----------------------------
323
- # KEY METRICS SUMMARY (Markdown)
324
- # -----------------------------
325
- metrics_summary_text = """
326
- ### 📌 Key Metrics Summary
327
-
328
- | Metric | Description |
329
- |--------|------------|
330
- | **Count** | Number of headlines predicted as Positive / Neutral / Negative. |
331
- | **%** | Percentage of headlines in each sentiment category. |
332
- | **Avg Sentiment (Overall)** | Average **overall score** for headlines in this category. Calculated as **positive probability − negative probability** per headline, then averaged. |
333
- | **Weighted Count** | Sum of the raw sentiment probabilities for each category across all headlines. Provides a “confidence-weighted” measure of sentiment dominance. |
334
- | **Overall Score (per headline)** | `positive − negative` probability. Shows whether the headline is more positive or negative. |
335
- | **Dominant Sentiment (per headline)** | The sentiment with the **highest probability** among positive, neutral, or negative. Highlighted in the table. |
336
-
337
- **Charts:**
338
- - **Daily Headline Counts**: Shows how many headlines per day fall into each sentiment.
339
- - **Daily Sentiment Trend**: Shows average overall sentiment per day (positive − negative).
340
- - **Stock Price + Sentiment Trend**: Plots stock closing price alongside daily sentiment for easy correlation.
341
- """
342
-
343
- return (
344
- summary,
345
- info_msg,
346
- chart_counts_html,
347
- chart_sentiment_html,
348
- table + "<br>" + csv_link_html,
349
- chart_price_sentiment_html,
350
- metrics_summary_text
351
- )
352
-
353
- # -----------------------------
354
- # GRADIO UI
355
- # -----------------------------
356
- with gr.Blocks(title="Indian Stock Market Sentiment Analyzer") as ui:
357
-
358
- gr.Markdown("<h1 style='text-align:center;'>🇮🇳 Indian Stock Market Sentiment Analyzer</h1>")
359
- gr.Markdown("<p style='text-align:center;'>Enter an NSE/BSE stock symbol. The app uses FinBERT + Google News to generate sentiment analysis of recent headlines.</p>")
360
-
361
- with gr.Row():
362
- with gr.Column(scale=1):
363
- symbol_in = gr.Textbox(label="Enter Stock Symbol (e.g., RELIANCE, TCS)")
364
- period_in = gr.Dropdown(
365
- ["Last 7 days", "Last 10 days", "Last 1 month"],
366
- value="Last 7 days",
367
- label="Select Period"
368
- )
369
- max_news_in = gr.Slider(
370
- minimum=20, maximum=100, step=1, value=50,
371
- label="Number of Headlines to Fetch"
372
- )
373
- btn = gr.Button("Analyze")
374
-
375
- with gr.Column(scale=2):
376
- summary_out = gr.Markdown()
377
- info_out = gr.Markdown()
378
-
379
- with gr.Row():
380
- chart1_out = gr.HTML(label="Daily Headline Counts")
381
- chart2_out = gr.HTML(label="Daily Sentiment Trend")
382
- chart3_out = gr.HTML(label="Daily Stock Price + Sentiment Trend")
383
-
384
- with gr.Row():
385
- table_out = gr.HTML(label="Headlines Table")
386
-
387
- # Add metrics summary in a separate row to make it clearly visible
388
- with gr.Row():
389
- metrics_summary_out = gr.Markdown(label="Key Metrics Summary")
390
-
391
- btn.click(
392
- run_pipeline,
393
- inputs=[symbol_in, period_in, max_news_in],
394
- outputs=[summary_out, info_out, chart1_out, chart2_out, table_out, chart3_out, metrics_summary_out]
395
- )
396
-
397
- ui.launch(share=True)
398
-
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+ import torch
7
+ from datetime import datetime, timedelta
8
+ import yfinance as yf
9
+ import matplotlib.pyplot as plt
10
+ import matplotlib.dates as mdates
11
+ import io
12
+ import base64
13
+ import html
14
+
15
+ # -----------------------------
16
+ # LOAD LOCAL TICKERS
17
+ # -----------------------------
18
+ tickers = pd.read_csv("tickers.csv")
19
+
20
+ def get_company_name(symbol: str):
21
+ symbol = symbol.upper().strip()
22
+ row = tickers[tickers["Symbol"] == symbol]
23
+ if not row.empty:
24
+ return True, row.iloc[0]["Symbol"], row.iloc[0]["Company"]
25
+
26
+ try:
27
+ ticker = yf.Ticker(symbol + ".NS")
28
+ info = ticker.info
29
+ if "longName" in info:
30
+ return True, symbol, info["longName"]
31
+ except Exception:
32
+ pass
33
+
34
+ return False, None, None
35
+
36
+ # -----------------------------
37
+ # FETCH NEWS
38
+ # -----------------------------
39
+ def fetch_news(query, max_items=50):
40
+ url = f"https://news.google.com/rss/search?q={query}+stock&hl=en-IN&gl=IN&ceid=IN:en"
41
+ try:
42
+ r = requests.get(url, timeout=5)
43
+ r.raise_for_status()
44
+ except Exception:
45
+ return [], 0
46
+
47
+ soup = BeautifulSoup(r.text, "lxml-xml")
48
+ items = soup.find_all("item")
49
+
50
+ news_list = []
51
+ for item in items[:max_items]:
52
+ news_list.append({
53
+ "title": item.title.text,
54
+ "link": item.link.text,
55
+ "published": item.pubDate.text,
56
+ })
57
+
58
+ return news_list, len(items)
59
+
60
+ # -----------------------------
61
+ # DATE PARSER
62
+ # -----------------------------
63
+ def parse_date(date_str):
64
+ if not date_str:
65
+ return None
66
+ fmts = ["%a, %d %b %Y %H:%M:%S %Z", "%a, %d %b %Y %H:%M:%S %z"]
67
+ for fmt in fmts:
68
+ try:
69
+ return datetime.strptime(date_str, fmt)
70
+ except:
71
+ pass
72
+ return None
73
+
74
+ # -----------------------------
75
+ # FILTER NEWS BY PERIOD
76
+ # -----------------------------
77
+ def filter_news_by_period(news_list, period_days=7):
78
+ cutoff = datetime.utcnow() - timedelta(days=period_days)
79
+ filtered = []
80
+ for item in news_list:
81
+ dt = parse_date(item.get("published", ""))
82
+ if dt and dt.replace(tzinfo=None) >= cutoff:
83
+ filtered.append(item)
84
+ return filtered
85
+
86
+ # -----------------------------
87
+ # LOAD FINBERT
88
+ # -----------------------------
89
+ tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
90
+ model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
91
+ labels = [model.config.id2label[i] for i in range(model.config.num_labels)]
92
+
93
+ def analyze_sentiment_batch(texts):
94
+ batch = tokenizer(texts, return_tensors="pt", truncation=True, padding=True)
95
+ with torch.no_grad():
96
+ output = model(**batch)
97
+ probs = torch.nn.functional.softmax(output.logits, dim=1)
98
+
99
+ results = []
100
+ for p in probs:
101
+ results.append({
102
+ "positive": float(p[labels.index("positive")]),
103
+ "neutral": float(p[labels.index("neutral")]),
104
+ "negative": float(p[labels.index("negative")]),
105
+ })
106
+ return results
107
+
108
+ # -----------------------------
109
+ # MAIN PIPELINE
110
+ # -----------------------------
111
+ def run_pipeline(user_input, period_option, max_news):
112
+
113
+ # Extract symbol if user selected "SYMBOL - Company"
114
+ if " - " in user_input:
115
+ raw_symbol = user_input.split(" - ")[0].strip()
116
+ else:
117
+ raw_symbol = user_input.strip()
118
+
119
+ period_map = {"Last 7 days": 7, "Last 10 days": 10, "Last 1 month": 30}
120
+ period_days = period_map.get(period_option, 7)
121
+
122
+ valid, symbol, company = get_company_name(raw_symbol)
123
+ if not valid:
124
+ return f"❌ '{user_input}' is not a valid NSE stock symbol.", "", "", "", "", "", ""
125
+
126
+ # Fetch news
127
+ query = company + " stock"
128
+ news, total_items = fetch_news(query, max_items=int(max_news))
129
+ news = filter_news_by_period(news, period_days=period_days)
130
+
131
+ fetched_count = min(total_items, int(max_news))
132
+
133
+ if len(news) == 0:
134
+ return f"No news found for {company} in {period_option}", "", "", "", "", "", ""
135
+
136
+ info_msg = f"**Found {len(news)} headlines from the last {period_days} days (fetched {fetched_count} / requested {max_news}).**"
137
+
138
+ # Sentiment
139
+ texts = [n["title"] for n in news]
140
+ sentiments = analyze_sentiment_batch(texts)
141
+
142
+ results = []
143
+ counts = {"positive": 0, "neutral": 0, "negative": 0}
144
+ overall_sums = {"positive": 0.0, "neutral": 0.0, "negative": 0.0}
145
+ weighted_counts = {"positive": 0.0, "neutral": 0.0, "negative": 0.0}
146
+ date_sentiments = {}
147
+ date_counts = {}
148
+
149
+ for item, sent in zip(news, sentiments):
150
+ pos, neu, neg = sent["positive"], sent["neutral"], sent["negative"]
151
+ overall = round(pos - neg, 3)
152
+
153
+ pred = max(["positive", "neutral", "negative"], key=lambda k: sent[k])
154
+ counts[pred] += 1
155
+ overall_sums[pred] += overall
156
+
157
+ for k in ["positive", "neutral", "negative"]:
158
+ weighted_counts[k] += sent[k]
159
+
160
+ dt = parse_date(item["published"])
161
+ if dt:
162
+ dkey = dt.date().isoformat()
163
+ date_sentiments.setdefault(dkey, []).append(overall)
164
+ date_counts.setdefault(dkey, []).append(pred)
165
+
166
+ results.append({
167
+ "headline": item["title"],
168
+ "positive": round(pos, 3),
169
+ "neutral": round(neu, 3),
170
+ "negative": round(neg, 3),
171
+ "overall": overall,
172
+ "published": item["published"],
173
+ "link": item["link"]
174
+ })
175
+
176
+ results.sort(key=lambda x: parse_date(x["published"]), reverse=True)
177
+
178
+ # -----------------------------
179
+ # SUMMARY
180
+ # -----------------------------
181
+ summary = f"""
182
+ ### 📊 Sentiment Summary for {company} ({symbol}) — {period_option}
183
+
184
+ | Sentiment | Count | Weighted Count |
185
+ |----------|-------|-----------|
186
+ | 😊 Positive | {counts['positive']} | {weighted_counts['positive']:.2f} |
187
+ | 😐 Neutral | {counts['neutral']} | {weighted_counts['neutral']:.2f} |
188
+ | 😞 Negative | {counts['negative']} | {weighted_counts['negative']:.2f} |
189
+ | Total | {len(news)} | {len(news)} |
190
+
191
+ """
192
+
193
+ # -----------------------------
194
+ # HEADLINES TABLE
195
+ # -----------------------------
196
+ table = """
197
+ <table style='width:100%; border-collapse: collapse;'>
198
+ <tr style='background-color:#f2f2f2'>
199
+ <th>Published</th>
200
+ <th>Headline</th>
201
+ <th>Positive</th>
202
+ <th>Neutral</th>
203
+ <th>Negative</th>
204
+ <th>Sentiment Score<br>(POS-NEG)</th>
205
+ <th>Link</th>
206
+ </tr>
207
+ """
208
+
209
+ for i, r in enumerate(results):
210
+ row_color = "#ffffff" if i % 2 == 0 else "#f9f9f9"
211
+
212
+ # Determine highest sentiment
213
+ max_sent = max(["positive", "neutral", "negative"], key=lambda k: r[k])
214
+
215
+ # Colors
216
+ color_map = {"positive": "green", "neutral": "gray", "negative": "red"}
217
+
218
+ # Helper: build cell HTML
219
+ def cell_html(value, sent_type):
220
+ if max_sent == sent_type:
221
+ return f"<td style='color:{color_map[sent_type]}; font-weight:bold'>{value}</td>"
222
+ else:
223
+ return f"<td>{value}</td>"
224
+
225
+ color = "green" if r["overall"] > 0 else "red" if r["overall"] < 0 else "black"
226
+
227
+ table += f"<tr style='background-color:{row_color};'>"
228
+ table += f"<td>{r['published']}</td>"
229
+ table += f"<td>{html.escape(r['headline'])}</td>"
230
+ table += cell_html(r['positive'], "positive")
231
+ table += cell_html(r['neutral'], "neutral")
232
+ table += cell_html(r['negative'], "negative")
233
+ table += f"<td style='color:{color}; font-weight:bold'>{r['overall']}</td>"
234
+ table += f"<td><a href='{r['link']}' target='_blank'>Open</a></td>"
235
+ table += "</tr>"
236
+
237
+ table += "</table>"
238
+
239
+ # -----------------------------
240
+ # CSV DOWNLOAD
241
+ # -----------------------------
242
+ df = pd.DataFrame(results)
243
+ csv_buf = io.StringIO()
244
+ df.to_csv(csv_buf, index=False)
245
+ csv_data = "data:text/csv;base64," + base64.b64encode(csv_buf.getvalue().encode()).decode()
246
+ csv_link_html = f"<a href='{csv_data}' download='{symbol}_news_sentiment.csv'>⬇️ Download CSV</a>"
247
+
248
+ # -----------------------------
249
+ # CHARTS
250
+ # -----------------------------
251
+ chart_counts_html = chart_sentiment_html = chart_price_sentiment_html = ""
252
+
253
+ if date_sentiments:
254
+ dates = sorted(date_sentiments.keys())
255
+ avg_sentiments = [sum(date_sentiments[d])/len(date_sentiments[d]) for d in dates]
256
+ pos_counts = [date_counts[d].count("positive") for d in dates]
257
+ neu_counts = [date_counts[d].count("neutral") for d in dates]
258
+ neg_counts = [date_counts[d].count("negative") for d in dates]
259
+
260
+ # Chart 1
261
+ fig1, ax1 = plt.subplots(figsize=(6, 3))
262
+ ax1.bar(dates, pos_counts, color="green")
263
+ ax1.bar(dates, neu_counts, bottom=pos_counts, color="gray")
264
+ ax1.bar(dates, neg_counts, bottom=[p + n for p, n in zip(pos_counts, neu_counts)], color="red")
265
+ ax1.set_xlabel("")
266
+ ax1.set_title("Daily Headline Counts")
267
+ ax1.tick_params(axis='x', rotation=60, labelsize=7)
268
+ plt.tight_layout()
269
+ buf1 = io.BytesIO()
270
+ fig1.savefig(buf1, format="png")
271
+ buf1.seek(0)
272
+ chart_counts_html = f"<img src='data:image/png;base64,{base64.b64encode(buf1.read()).decode()}' style='width:100%; max-width:700px;'/>"
273
+ plt.close(fig1)
274
+
275
+ # Chart 2
276
+ fig2, ax2 = plt.subplots(figsize=(6, 3))
277
+ colors = ["green" if x > 0 else "red" if x < 0 else "gray" for x in avg_sentiments]
278
+ ax2.bar(dates, avg_sentiments, color=colors)
279
+ ax2.axhline(0, color="black", linestyle="--")
280
+ ax2.set_title("Daily Sentiment Trend")
281
+ ax2.tick_params(axis='x', rotation=60, labelsize=7)
282
+ plt.tight_layout()
283
+ buf2 = io.BytesIO()
284
+ fig2.savefig(buf2, format="png")
285
+ buf2.seek(0)
286
+ chart_sentiment_html = f"<img src='data:image/png;base64,{base64.b64encode(buf2.read()).decode()}' style='width:100%; max-width:700px;'/>"
287
+ plt.close(fig2)
288
+
289
+ # Chart 3 - Stock Price + Sentiment
290
+ try:
291
+ ticker_data = yf.Ticker(symbol + ".NS").history(period=f"{period_days}d")
292
+
293
+ if not ticker_data.empty:
294
+ fig3, ax3 = plt.subplots(figsize=(6, 3))
295
+ ax3.plot(ticker_data.index, ticker_data['Close'], color="blue", marker="o", label="Close Price")
296
+
297
+ sentiment_dates = [pd.to_datetime(d) for d in dates]
298
+ ax3_twin = ax3.twinx()
299
+ ax3_twin.plot(sentiment_dates, avg_sentiments, color="orange", marker="o", label="Sentiment Score")
300
+
301
+ lines1, labels1 = ax3.get_legend_handles_labels()
302
+ lines2, labels2 = ax3_twin.get_legend_handles_labels()
303
+ ax3.legend(lines1 + lines2, labels1 + labels2, loc="upper left")
304
+
305
+ ax3.set_title("Daily Stock Price + Sentiment Trend")
306
+ ax3.xaxis.set_major_formatter(mdates.DateFormatter("%d-%b"))
307
+ fig3.autofmt_xdate()
308
+
309
+ plt.tight_layout()
310
+ buf3 = io.BytesIO()
311
+ fig3.savefig(buf3, format="png")
312
+ buf3.seek(0)
313
+ chart_price_sentiment_html = f"<img src='data:image/png;base64,{base64.b64encode(buf3.read()).decode()}' style='width:100%; max-width:700px;'/>"
314
+ plt.close(fig3)
315
+
316
+ except Exception:
317
+ chart_price_sentiment_html = ""
318
+
319
+ # -----------------------------
320
+ # KEY METRICS SUMMARY (Markdown)
321
+ # -----------------------------
322
+ metrics_summary_text = """
323
+ ### 📌 Key Metrics Summary
324
+
325
+ | Metric | Description |
326
+ |--------|------------|
327
+ | **Count** | Number of headlines predicted as Positive / Neutral / Negative. |
328
+ | **Weighted Count** | Sum of the raw sentiment probabilities for each category across all headlines. Provides a “confidence-weighted” measure of sentiment dominance. |
329
+ | **Sentiment Score (per headline)** | `positive − negative` probability. Shows whether the headline is more positive or negative. |
330
+ | **Dominant Sentiment (per headline)** | The sentiment with the **highest probability** among positive, neutral, or negative. Highlighted in the table. |
331
+
332
+ **Charts:**
333
+ - **Daily Headline Counts**: Shows how many headlines per day fall into each sentiment.
334
+ - **Daily Sentiment Trend**: Shows average overall sentiment per day (positive negative).
335
+ - **Daily Stock Price + Sentiment Trend**: Plots stock closing price alongside daily sentiment for easy correlation.
336
+ """
337
+
338
+ return (
339
+ summary,
340
+ info_msg,
341
+ chart_counts_html,
342
+ chart_sentiment_html,
343
+ table + "<br>" + csv_link_html,
344
+ chart_price_sentiment_html,
345
+ metrics_summary_text
346
+ )
347
+
348
+ # -----------------------------
349
+ # GRADIO UI
350
+ # -----------------------------
351
+ with gr.Blocks(title="Indian Stock Market Sentiment Analyzer") as ui:
352
+
353
+ gr.Markdown("<h1 style='text-align:center;'>🇮🇳 Indian Stock Market Sentiment Analyzer</h1>")
354
+ gr.Markdown("<p style='text-align:center;'>Enter an NSE/BSE stock symbol. The app uses FinBERT + Google News to generate sentiment analysis of recent headlines.</p>")
355
+
356
+ with gr.Row():
357
+ with gr.Column(scale=1):
358
+ symbol_in = gr.Textbox(label="Enter Stock Symbol (e.g., RELIANCE, TCS)")
359
+ with gr.Column(scale=1):
360
+ period_in = gr.Dropdown(
361
+ ["Last 7 days", "Last 10 days", "Last 1 month"],
362
+ value="Last 7 days",
363
+ label="Select Period"
364
+ )
365
+ with gr.Column(scale=1):
366
+ max_news_in = gr.Slider(
367
+ minimum=20, maximum=100, step=1, value=50,
368
+ label="Number of Headlines to Fetch"
369
+ )
370
+ btn = gr.Button("Analyze")
371
+
372
+ with gr.Row():
373
+ info_out = gr.Markdown()
374
+
375
+ with gr.Row():
376
+ summary_out = gr.Markdown()
377
+ chart1_out = gr.HTML(label="Daily Headline Counts")
378
+
379
+ with gr.Row():
380
+ chart2_out = gr.HTML(label="Daily Sentiment Trend")
381
+ chart3_out = gr.HTML(label="Daily Stock Price + Sentiment Trend")
382
+
383
+ with gr.Row():
384
+ table_out = gr.HTML(label="Headlines Table")
385
+
386
+ # Add metrics summary in a separate row to make it clearly visible
387
+ with gr.Row():
388
+ metrics_summary_out = gr.Markdown(label="Key Metrics Summary")
389
+
390
+ btn.click(
391
+ run_pipeline,
392
+ inputs=[symbol_in, period_in, max_news_in],
393
+ outputs=[summary_out, info_out, chart1_out, chart2_out, table_out, chart3_out, metrics_summary_out]
394
+ )
395
+
396
+ ui.launch()
397
+