Source code for bio_embeddings.embed.prottrans_albert_bfd_embedder

from pathlib import Path

from transformers import AlbertModel, AlbertTokenizer

from bio_embeddings.embed.prottrans_base_embedder import ProtTransBertBaseEmbedder


[docs]class ProtTransAlbertBFDEmbedder(ProtTransBertBaseEmbedder): """ProtTrans-Albert-BFD Embedder (ProtAlbert-BFD) Elnaggar, Ahmed, et al. "ProtTrans: Towards Cracking the Language of Life's Code Through Self-Supervised Deep Learning and High Performance Computing." arXiv preprint arXiv:2007.06225 (2020). https://arxiv.org/abs/2007.06225 """ _model: AlbertModel name = "prottrans_albert_bfd" embedding_dimension = 4096 number_of_layers = 1
[docs] def __init__(self, **kwargs): """Initialize Albert embedder. :param model_directory: :param half_precision_model: """ super().__init__(**kwargs) self._model_directory = self._options["model_directory"] self._half_precision_model = self._options.get("half_precision_model", False) # make model self._model = AlbertModel.from_pretrained(self._model_directory) # Compute in half precision, which is a lot faster and saves us half the memory if self._half_precision_model: self._model = self._model.half() self._model = self._model.eval().to(self._device) self._model_fallback = None self._tokenizer = AlbertTokenizer( str(Path(self._model_directory) / "albert_vocab_model.model"), do_lower_case=False, )
def _get_fallback_model(self) -> AlbertModel: """ Returns the CPU model """ if not self._model_fallback: self._model_fallback = AlbertModel.from_pretrained( self._model_directory ).eval() return self._model_fallback