# hf_diag.py from huggingface_hub import InferenceClient import os, json TOK = os.getenv("HF_TOKEN") or "paste-your-token-here" MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct" # try both your primary and fallback model names client = InferenceClient(token=TOK) def try_chat(): messages = [{"role":"system","content":"You are a helpful assistant."}, {"role":"user","content":"Say hello and return two short code files in JSON: files mapping with main.py and tests/test_main.py"}] try: print("Calling chat_completion (non-stream)...") resp = client.chat_completion(messages=messages, model=MODEL, stream=False, max_new_tokens=256) print("TYPE:", type(resp)) try: print("As repr:", repr(resp)[:1000]) except: print("Couldn't repr resp") # try to JSON-dump try: print("JSON-able?", json.dumps(resp)[:1000]) except Exception as e: print("Not JSON serializable:", e) except Exception as e: print("chat_completion failed:", e) if __name__ == "__main__": try_chat()