spec:
  entry_points:
    open_mpi_handler:
      lineno: 56
      parameters:
      - name: worker_inputs
        type: List[str]
      - name: root_worker_inputs
        type: Dict[str, Any]
        default: null
      name: open_mpi_handler
      has_kwargs: false
      doc: ''
      has_varargs: false
    decorator:
      lineno: 68
      parameters:
      - name: handler
      name: decorator
      has_kwargs: false
      doc: ''
      has_varargs: false
    wrapper:
      lineno: 73
      name: wrapper
      has_kwargs: true
      doc: ''
      has_varargs: false
    translate:
      outputs:
      - doc: 'A tuple of:'
        type: Tuple[str, pd.DataFrame, dict]
      lineno: 135
      parameters:
      - name: data_path
        type: Union[str, List[str], Path]
        doc: A directory of text files or a single file or a list of files to translate.
      - name: output_directory
        type: str
        doc: Directory where the translated files will be saved.
      - name: model_name
        type: str
        doc: The name of a model to load. If None, the model name is constructed using
          the source and target languages parameters.
        default: null
      - name: source_language
        type: str
        doc: The source language code (e.g., 'en' for English).
        default: null
      - name: target_language
        type: str
        doc: The target language code (e.g., 'en' for English).
        default: null
      - name: device
        type: str
        doc: The device index for transformers. Default will prefer cuda if available.
        default: null
      - name: model_kwargs
        type: dict
        doc: Keyword arguments to pass regarding the loading of the model in HuggingFace's
          `pipeline` function.
        default: null
      - name: batch_size
        type: int
        doc: The number of batches to use in translation. The files are translated
          one by one, but the sentences can be batched.
        default: 1
      - name: translation_kwargs
        type: dict
        doc: Additional keyword arguments to pass to a `transformers.TranslationPipeline`
          when doing the translation inference. Notice the batch size here is being
          added automatically.
        default: null
      - name: verbose
        type: bool
        doc: 'Whether to present logs of a progress bar and errors. Default: True.'
        default: false
      name: translate
      has_kwargs: false
      doc: 'Translate text files using a transformer model from Huggingface''s hub
        according to the source and target languages

        given (or using the directly provided model name). The end result is a directory
        of translated text files and a

        dataframe containing the following columns:


        * text_file - The text file path.

        * translation_file - The translation text file name in the output directory.'
      has_varargs: false
  build:
    requirements:
    - transformers
    - sentencepiece
    - torch
    - tqdm
    code_origin: ''
    functionSourceCode: 
    base_image: mlrun/mlrun
    origin_filename: ''
  image: ''
  default_handler: translate
  disable_auto_mount: false
  command: ''
  description: Translate text files from one language to another
verbose: false
metadata:
  categories:
  - genai
  - NLP
  tag: ''
  name: translate
kind: job