AWS INFERENTIA ONLY

from transformers import AutoTokenizer

from optimum.neuron import NeuronModelForFeatureExtraction


# 1. Export
input_shapes = {"batch_size": 1, "sequence_length": 128}
compiler_args = {"auto_cast": "matmul", "auto_cast_type": "bf16"}
neuron_model = NeuronModelForFeatureExtraction.from_pretrained(
    "google-bert/bert-base-uncased",
    export=True,
    **input_shapes,
    **compiler_args,
)
# Save locally
neuron_model.save_pretrained("bert_feature_extraction_neuronx")
neuron_model.push_to_hub(
    "bert_feature_extraction_neuronx", repository_id="optimum/bert-base-uncased-neuronx-bs1-sq128"  # Replace with your HF Hub repo id
)

# 2. Inference
tokenizer = AutoTokenizer.from_pretrained("optimum/bert-base-uncased-neuronx-bs1-sq128")
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = neuron_model(**inputs)
last_hidden_states = outputs.last_hidden_state
Downloads last month
5
Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐Ÿ™‹ Ask for provider support