python source code of sge

anvio-master
- .github
  - issue_template.md
- conda-recipe
  - anvio
    - meta.yaml
  - anvio-minimal
    - meta.yaml
- anvio
  - metapanops.py
  - clusteringconfuguration.py
  - genomedescriptions.py
  - db.py
  - genomestorage.py
  - ccollections.py
  - parsers
    - concoct.py
    - interproscan.py
    - krakenuniq.py
    - defaultmatrix.py
    - __init__.py
    - kaiju.py
    - centrifuge.py
    - hmmscan.py
    - base.py
  - bamops.py
  - hmmops.py
  - kmers.py
  - filesnpaths.py
  - variabilityops.py
  - terminal.py
  - fastalib.py
  - mcgclassifier.py
  - auxiliarydataops.py
  - synteny.py
  - tables
    - genelevelcoverages.py
    - indels.py
    - genecalls.py
    - states.py
    - kmers.py
    - ntpositions.py
    - genefunctions.py
    - trnahits.py
    - taxonomy.py
    - collections.py
    - miscdata.py
    - views.py
    - scgtaxonomy.py
    - __init__.py
    - codonfrequencies.py
    - geneclusters.py
    - tableops.py
    - hmmhits.py
    - variability.py
    - contigsplitinfo.py
  - merger.py
  - scgtaxonomyops.py
  - pfam.py
  - drivers
    - famsa.py
    - hmmer.py
    - trnscan_se.py
    - emapper.py
    - mcl.py
    - concoct.py
    - muscle.py
    - sourmash.py
    - maxbin2.py
    - pyani.py
    - fastani.py
    - diamond.py
    - fasttree.py
    - blast.py
    - binsanity.py
    - __init__.py
    - dastool.py
    - MODELLER.py
    - prodigal.py
    - metabat2.py
  - homogeneityindex.py
  - bottleroutes.py
  - cogs.py
  - splitter.py
  - summarizer.py
  - serverAPI.py
  - profiler.py
  - sge.py
  - constants.py
  - data
    - misc
      - MODELLER
        db
        .gitkeep
        scripts
        fasta_to_pir.py
        search.py
        add_chain_identifiers_to_best_model.py
        binarize_database.py
        align_to_templates.py
        get_model.py
        pir_to_fasta.py
      - SCG_TAXONOMY
        GTDB
        SCG_SEARCH_DATABASES
        VERSION
        __init.py__
      - SCGDOMAINCLASSIFIER.md
      - SCGDOMAINCLASSIFIER.rf
    - __init__.py
    - seq_transition_models
      - AA
        third_order.npy
        first_order.npy
        second_order.npy
      - README.md
    - SSMs
      - AA
        BLOSUM62.txt
        BLOSUM90.txt
        __init__.py
      - CDN
        __init__.py
      - __init__.py
      - README.md
      - NT
        __init__.py
    - hmm
      - Ribosomal_RNAs
        genes.txt
        noise_cutoff_terms.txt
        reference.txt
        kind.txt
        target.txt
        README.md
      - Archaea_76
        genes.txt
        noise_cutoff_terms.txt
        reference.txt
        kind.txt
        target.txt
      - Bacteria_71
        genes.txt
        noise_cutoff_terms.txt
        reference.txt
        readme.txt
        kind.txt
        target.txt
      - Protista_83
        genes.txt
        noise_cutoff_terms.txt
        reference.txt
        kind.txt
        target.txt
      - __init__.py
    - interactive
      - images
        blank.gif
        loading.gif
        drag.gif
        spinning_dna.gif
        custom_indic.gif
      - geneclusters.html
      - lib
        bootstrap
        bootstrap-waitingfor
        jquery-ui
        marked
        bootstrap-markdown
        svg-crowbar
        LICENSE
        svg-crowbar.js
        colpick
        LICENSE
        colpick.css
        colpick.js
        treelib-js
        LICENSE
        bootstrap-sortable
        toastr
        randomColor
        jquery
        JavaScript-MD5
        jquery-svgpan
        LICENSE
        jquery-svgpan.js
        d3.js
      - metabolism.html
      - contigs.html
      - js
        dialogs
        save-tree.js
        bigsi.js
        collapse-node.js
        store-collection.js
        load-collection.js
        search.js
        sample.js
        geneclusters.js
        help-messages.js
        charts.js
        context-menu.js
        migrations.js
        contigs-plot.js
        news.js
        main.js
        svg-helpers.js
        area-zoom.js
        bin.js
        animations.js
        drawer.js
        color-coding.js
        tree.js
        utils.js
        mouse-events.js
        structure.js
        multiple.js
        inspectionutils.js
        constants.js
      - index.html
      - structure.html
      - css
        glowing-button.css
        markdown-local.css
        main.css
        loading.css
        contigs-plot.css
        popover.css
        charts.css
        normalize.css
        geneclusters.css
        structure.css
      - charts.html
      - login.html
    - clusterconfigs
      - merged
        tnf
        cov
        tnf-cov
      - pan
        frequency
        presence-absence
      - blank
        tnf
        tnf-splits
      - single
        tnf
        tnf-ab-cov
      - README.md
    - static
      - content
        chartsjs
        LICENSE
        README
        bootstrap-sortable
        pics
        fonts
        glyphicons-halflings-regular.woff
        glyphicons-halflings-regular.eot
        glyphicons-halflings-regular.woff2
        glyphicons-halflings-regular.ttf
        glyphicons-halflings-regular.svg
        js
        bootstrap.min.js
        jquery.min.js
        pako.min.js
        css
        anvio.css
        bootstrap.css
      - template
        vignette.tmpl
        profile-index-mini.tmpl
        profile-index.tmpl
        saavs-index.tmpl
        artifact.tmpl
        program.tmpl
        pan-index.tmpl
        programs_and_artifacts_index.tmpl
  - contigops.py
  - mcgops.py
  - __init__.py
  - hmmopswrapper.py
  - workflows
    - phylogenomics
      - Snakefile
      - __init__.py
    - pangenomics
      - Snakefile
      - __init__.py
    - metagenomics
      - Snakefile
      - __init__.py
    - __init__.py
    - contigs
      - Snakefile
      - __init__.py
  - genomesimilarity.py
  - utils.py
  - migrations
    - genomestorage
      - v4_to_v5.py
      - v6_to_v7.py
      - v5_to_v6.py
      - v3_to_v4.py
      - __init__.py
    - pan
      - v4_to_v5.py
      - v6_to_v7.py
      - v8_to_v9.py
      - v5_to_v6.py
      - v11_to_v12.py
      - v7_to_v8.py
      - v13_to_v14.py
      - __init__.py
      - v9_to_v10.py
      - v12_to_v13.py
      - v10_to_v11.py
    - genes
      - v4_to_v5.py
      - v5_to_v6.py
      - __init__.py
    - config
      - v0_to_v1.py
      - __init__.py
    - structure
      - v1_to_v2.py
    - __init__.py
    - contigs
      - v16_to_v17.py
      - v14_to_v15.py
      - v6_to_v7.py
      - v8_to_v9.py
      - v5_to_v6.py
      - v11_to_v12.py
      - v15_to_v16.py
      - v7_to_v8.py
      - v13_to_v14.py
      - v17_to_v18.py
      - __init__.py
      - v9_to_v10.py
      - v12_to_v13.py
      - v10_to_v11.py
    - profile
      - v16_to_v17.py
      - v28_to_v29.py
      - v21_to_v22.py
      - v14_to_v15.py
      - v19_to_v20.py
      - v31_to_v32.py
      - v30_to_v31.py
      - v15_to_v16.py
      - v18_to_v19.py
      - v13_to_v14.py
      - v17_to_v18.py
      - v27_to_v28.py
      - v20_to_v21.py
      - v32_to_v33.py
      - v25_to_v26.py
      - v23_to_v24.py
      - v33_to_v34.py
      - __init__.py
      - v24_to_v25.py
      - v29_to_v30.py
      - v22_to_v23.py
      - v26_to_v27.py
    - modules
      - v1_to_v2.py
      - __init__.py
  - dictio.py
  - tests
    - run_gen_gene_level_stats_tests.sh
    - test-mcg-classifier
      - config.json
      - mcg-classifier.snakefile
    - run_workflows_tests.sh
    - 00.sh
    - run_metagenomics_workflow_tests.sh
    - run_mini_test.sh
    - run_metapangenomics_workflow_tests.sh
    - run_metabolism_tests.sh
    - run_all_tests.sh
    - misc
      - bowtie_batch_single_fasta.sh
      - bowtie_batch_paried.sh
    - run_phylogenomics_workflow_tests.sh
    - run_manual_interactive.sh
    - run_pangenomics_workflow_tests.sh
    - run_scg_taxonomy_consensus_processing_tests.py
    - run_variability_mock.sh
    - __init__.py
    - server
      - start_server.sh
      - README.txt
      - run_server_tests.py
    - sandbox
      - items_addtl_data_gene_mode.txt
      - mock_data_for_structure
        additional_layers_data.txt
        02.ini
        04.ini
        01.ini
        00_README
        STRUCTURES
        2.pdb
        3.pdb
        0.pdb
        4.pdb
        RAPTORXPROPERTY
        4.acc
        1.acc
        2.acc
        1.diso
        4.ss3
        4.diso
        1.ss8
        3.ss3
        2.diso
        0.diso
        0.ss8
        4.ss8
        3.ss8
        3.diso
        1.ss3
        3.acc
        2.ss8
        2.ss3
        0.acc
        0.ss3
        one_contig_five_genes.fa
        collection.txt
        proteins.fa
        03.ini
        05.ini
      - SAMPLE-02-RAW.bam
      - additional_view.txt
      - contigs.fa
      - samples-order.txt
      - items_additional_data.txt
      - example_files_for_centrifuge_taxonomy
        centrifuge_report.tsv
        centrifuge_hits.tsv
      - sample_contig_ids.txt
      - example_state.json
      - samples_to_exclude_for_mcg.txt
      - external_hmm_profile
        genes.txt
        noise_cutoff_terms.txt
        genes.hmm.gz
        reference.txt
        kind.txt
        target.txt
      - emapper_1.0.3_eggNOG_functions.txt
      - config_files_for_variability_testing
        02.ini
        04.ini
        01.ini
        default.json
        03.ini
        05.ini
      - example_items_order_file.txt
      - test_visualize_split_coverages
        run_tests.R
        Makefile
        test_files
        sample_data.txt
        split_cov.txt
        snv.txt
        sample_data_no_group.txt
        sample_data_no_color.txt
        test_visualize_split_coverages.R
      - samples-single-order-newick.txt
      - concoct_mini_test.txt
      - distant_positions_for_linkmers.txt
      - adjacent_positions_for_linkmers.txt
      - files_for_manual_interactive
        samples-order.txt
        profile.db
        additional_view_data.txt
        view_data.txt
        tree.txt
        bad_view_data.txt
        samples-information.txt
        fasta.fa
      - example_interpro_output.tsv.README
      - single_contig.fa
      - layers_addtl_data_gene_mode.txt
      - additional_view_data.txt
      - example_interpro_output.tsv
      - collection_for_blank_profile.txt
      - example_external_collections
        adhoc_collections.txt
        adhoc_colors.txt
      - data
        metagenomes
        human_gut
        IGD_SUBSET
        genomes
        archaea
        bacteria
        input_files
        external-genomes.txt
        bin-ids.txt
        internal-genomes.txt
        metagenomes.txt
      - smtp_config_sample.ini
      - workflows
        phylogenomics
        config.json
        s03-phylo.fa
        s02-phylo.fa
        s01-phylo.fa
        fasta.txt
        s04-phylo.fa
        s05-phylo.fa
        pangenomics
        config.json
        pan-config-with-phylogeny.json
        fasta.txt
        pan-config-with-phylogeny-using-hmms.json
        five-genomes-fasta.txt
        five_genomes_example
        GP01.fa.gz
        GP04.fa.gz
        GP02.fa.gz
        GP03.fa.gz
        GP05.fa.gz
        metagenomics
        samples.txt
        config-megahit-no-qc-all-against-all.json
        mock_ref_for_removal2.fa.gz
        MOCK-collection.txt
        three_samples_example
        G01-contigs.fa.gz
        G02-contigs.fa.gz
        collections.txt
        sample_01.krakenhll_tax
        mock_ref_for_removal.txt
        sample_03.krakenhll_tax
        samples-no-groups.txt
        config-references-mode.json
        config-idba_ud.json
        MOCK-collection-info.txt
        config-metaspades-no-qc-use-scaffolds.json
        fasta.txt
        sample_02.krakenhll_tax
        kraken.txt
        mock_ref_for_removal1.fa.gz
        config-idba_ud-no-qc.json
        config-megahit.json
        config-metaspades.json
        config-references-mode-no-qc-no-gzip-no-groups.json
        contigs
        fasta.txt
      - samples_to_include_for_mcg.txt
      - example_description.md
      - example_genes_of_interest.txt
      - anvi_server_files
        mock_project_directory_01
        dataFile
        profile.db
        treeFile
        samples.db
        fastaFile
      - samples-single-order-basic.txt
      - files_for_indel_testing
        small
        sample.ini
        contig.fa
        sample.json
        large
        sample.ini
        sample_T300del.ini
        00_README
        contig.fa
        contig_T300del.fa
      - example_files_for_inseq_tnseq
      - example_gene_functions_input_matrix.txt
      - sample_CONCOCT_bin_id.txt
      - example_files_for_external_binning_results
        external_binning_of_splits.txt
        example_bins_info_file.txt
        external_binning_of_contigs.txt
      - samples-information.txt
      - mock_data_for_pangenomics
        functions
        01-functions.txt
        02-functions.txt
        README.txt
        03-functions.txt
        external-genomes.txt
        03.fa
        default-state.json
        pfam
        aa_sequences_03.pfam
        aa_sequences_01.pfam
        aa_sequences_02.pfam
        aa_sequences_02.fa
        README.pfam
        aa_sequences_01.fa
        aa_sequences_03.fa
        example-gene-clusters-collection.txt
        02.fa
        01.fa
        group-information.txt
      - sample_gene_call_ids.txt
      - example_clustering_configuration.ini
      - mock_files_for_alons_classifier
        TEST.fa
        samples_to_exclude.txt
        TEST-COLLECTION.txt
        samples_to_include.txt
        default.json
        hmp0074-RAW.bam
      - concoct.txt
      - example_external_gene_calls.txt
      - example_files_for_kraken_hll_taxonomy
        SAMPLE-02.mpa
        SAMPLE-03.mpa
        SAMPLE-01.mpa
      - BAMs_SF
        contigs.fa
        SF02.bam
    - run_pangenome_tests.sh
    - run_structure_mock.sh
    - run_indel_test.sh
    - unit
      - utils
        test_files
        one_sequence.fasta
        five_sequences.fasta
        not_a_fasta.txt
        empty.fasta
        test_split_fasta.py
        __init__.py
      - test_Read_class.py
      - __init__.py
      - test_homogeneityindex.py
    - run_contigs_workflow_tests.sh
    - run_alons_classifier_tests.sh
  - errors.py
  - samplesops.py
  - learning.py
  - clustering.py
  - interactive.py
  - ttycolors.py
  - docs
    - images
      - svg
        icons.svg
      - png
        icons
    - artifacts
      - hmm-hits.md
      - hmm-source.md
      - external-gene-calls.md
      - ngrams.md
      - contigs-fasta.md
      - fasta.md
      - completion.md
      - contigs-db.md
      - external-genomes.md
      - metagenomes.md
      - variability-profile.md
      - kegg-db.md
      - kegg-functions.md
      - pan-db.md
      - 00_README.md
      - kegg-metabolism.md
      - internal-genomes.md
    - __init__.py
    - programs
      - anvi-setup-kegg-kofams.md
      - anvi-run-hmms.md
      - anvi-gen-genomes-storage.md
      - anvi-display-metabolism.md
      - anvi-gen-contigs-database.md
      - anvi-script-reformat-fasta.md
      - anvi-analyze-synteny.md
      - anvi-get-sequences-for-hmm-hits.md
      - anvi-run-kegg-kofams.md
      - 00_README.md
      - anvi-estimate-metabolism.md
  - summaryhtml.py
  - structureops.py
  - variability.py
  - sequence.py
  - scgdomainclassifier.py
  - completeness.py
  - programs.py
  - panops.py
  - genecalling.py
- .gitmodules
- CHANGELOG.md
- setup.py
- NEWS.md
- README.md
- bin
  - anvi-show-misc-data
  - anvi-upgrade
  - anvi-export-splits-and-coverages
  - anvi-compute-genome-similarity
  - anvi-oligotype-linkmers
  - anvi-run-scg-taxonomy
  - anvi-run-ncbi-cogs
  - anvi-run-pfams
  - anvi-export-collection
  - anvi-get-short-reads-from-bam
  - anvi-scan-trnas
  - anvi-setup-pfams
  - anvi-setup-pdb-database
  - anvi-init-bam
  - anvi-db-info
  - anvi-gen-fixation-index-matrix
  - anvi-show-collections-and-bins
  - anvi-estimate-genome-completeness
  - anvi-merge-bins
  - anvi-export-gene-calls
  - anvi-gen-variability-matrix
  - anvi-setup-ncbi-cogs
  - anvi-display-contigs-stats
  - anvi-compute-ani
  - anvi-compute-gene-cluster-homogeneity
  - anvi-get-codon-frequencies
  - anvi-get-short-reads-mapping-to-a-gene
  - anvi-import-taxonomy-for-layers
  - anvi-export-misc-data
  - anvi-delete-hmms
  - anvi-inspect
  - anvi-export-functions
  - anvi-matrix-to-newick
  - anvi-dereplicate-genomes
  - anvi-rename-bins
  - anvi-gen-structure-database
  - anvi-import-items-order
  - anvi-report-linkmers
  - anvi-push
  - anvi-gen-genomes-storage
  - anvi-export-contigs
  - anvi-gen-variability-profile
  - anvi-export-items-order
  - anvi-estimate-genome-taxonomy
  - anvi-gen-gene-consensus-sequences
  - anvi-analyze-synteny
  - anvi-mcg-classifier
  - anvi-export-structures
  - anvi-merge
  - anvi-run-hmms
  - anvi-refine
  - anvi-get-split-coverages
  - anvi-estimate-scg-taxonomy
  - anvi-export-gene-coverage-and-detection
  - anvi-delete-collection
  - anvi-get-sequences-for-gene-clusters
  - anvi-run-workflow
  - anvi-estimate-metabolism
  - anvi-delete-state
  - anvi-display-structure
  - anvi-gen-gene-level-stats-databases
  - anvi-import-state
  - anvi-delete-misc-data
  - anvi-compute-completeness
  - anvi-gen-contigs-database
  - anvi-get-sequences-for-hmm-hits
  - anvi-gen-network
  - anvi-search-functions
  - anvi-get-enriched-functions-per-pan-group
  - anvi-import-functions
  - anvi-display-metabolism
  - anvi-get-aa-counts
  - anvi-help
  - anvi-run-kegg-kofams
  - anvi-gen-phylogenomic-tree
  - anvi-profile
  - anvi-setup-scg-taxonomy
  - anvi-export-splits-taxonomy
  - anvi-cluster-contigs
  - anvi-split
  - anvi-migrate
  - anvi-update-structure-database
  - anvi-pan-genome
  - anvi-meta-pan-genome
  - anvi-get-sequences-for-gene-calls
  - anvi-import-collection
  - anvi-update-db-description
  - anvi-experimental-organization
  - anvi-interactive
  - anvi-gen-variability-network
  - anvi-display-pan
  - anvi-setup-kegg-kofams
  - anvi-import-taxonomy-for-genes
  - anvi-export-locus
  - anvi-import-misc-data
  - anvi-self-test
  - anvi-export-table
  - anvi-summarize
  - anvi-export-state
- requirements.txt
- sandbox
  - anvi-script-gen_stats_for_single_copy_genes.sh
  - anvi-script-gen_stats_for_single_copy_genes.py
  - anvi-script-FASTA-to-contigs-db
  - anvi-script-tabulate
  - anvi-script-gen-hmm-hits-matrix-across-genomes
  - anvi-script-get-short-reads-matching-something
  - anvi-script-variability-to-vcf
  - anvi-script-gen-pseudo-paired-reads-from-fastq
  - anvi-script-snvs-to-interactive
  - anvi-script-filter-fasta-by-blast
  - anvi-script-gen-distribution-of-genes-in-a-bin
  - anvi-script-gen-CPR-classifier
  - anvi-script-get-coverage-from-bam
  - anvi-script-estimate-genome-size
  - anvi-script-gen-scg-domain-classifier
  - anvi-script-predict-CPR-genomes
  - anvi-script-augustus-output-to-external-gene-calls
  - anvi-script-reformat-fasta
  - anvi-script-get-hmm-hits-per-gene-call
  - anvi-script-compute-ani-for-fasta
  - 00_README.txt
  - anvi-script-merge-collections
  - anvi-script-gen-programs-network
  - anvi-script-gen_stats_for_single_copy_genes.README
  - anvi-script-visualize-split-coverages
  - anvi-script-gen-programs-vignette
  - anvi-script-checkm-tree-to-interactive
  - anvi-script-calculate-pn-ps-ratio
  - anvi-script-get-collection-info
  - anvi-script-run-eggnog-mapper
  - anvi-script-process-genbank
  - anvi-script-gen_stats_for_single_copy_genes.R
  - anvi-script-run-functional-enrichment-stats
  - anvi-script-gen-short-reads
  - anvi-script-process-genbank-metadata
  - anvi-script-gen-help-pages
  - anvi-script-add-default-collection
  - anvi-script-transpose-matrix
- Dockerfile
- .gitignore
- LICENSE.txt
- .dockerignore
- MANIFEST.in
- AUTHORS.txt

# -*- coding: utf-8
# pylint: disable=line-too-long
"""Module to submit/track jobs for SUN Grid Engine"""

import os
import time
import glob
import random
import string
import subprocess

import anvio
import anvio.fastalib as u
import anvio.utils as utils
import anvio.filesnpaths as filesnpaths

from anvio.errors import ConfigError
from anvio.terminal import pretty_print as pp


__author__ = "Developers of anvi'o (see AUTHORS.txt)"
__copyright__ = "Copyleft 2015-2018, the Meren Lab (http://merenlab.org/)"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__status__ = "Development"


QSUB_SCRIPT = """#!/bin/sh
#$ -j y
#$ -o %(log)s
#$ -e %(log)s
#$ -N %(identifier)s
#$ -V

%(command)s"""


class Progress:
    def update(self, str):
        print(str)


class Run:
    def info(self, str_1, str_2):
        print("%s: %s" % (str(str_1), str(str_2)))


class SGE:
    def __init__(self):
        """
        This is a simple class to send jobs to Sun Grid Engine and to merge partial
        results. This runs well for me, but hasn't been really tested well with
        different versions of SGE. An example usage is follows:

            ----8<-----8<-----8<-----8<-----8<-----8<-----8<-----8<-----8<-----8<-----
                import os
                import utils
                from anvio.sge import SGE
                sge = SGE()
                sge.check_sge_binaries()
                sge.input_file_path = ...
                sge.tmp_dir = ...
                sge.input_is_fasta = True
                sge.merged_results_file_path = ...
                sge.binary = ... (full path to the binary file)
                sge.command = 'perl %(binary)s %(part)s'
                sge.wild_card_for_partial_results = "results.01.phymm*part-*.txt"

                try:
                    sge._run()
                except ConfigError, e:
                    print e
                    sys.exit(-1)

                os.deltree(sge.tmp_dir)

                # at this point the merged results file must be found here:
                print '%s' % sge.merged_results_file_path
            ----8<-----8<-----8<-----8<-----8<-----8<-----8<-----8<-----8<-----8<-----

        So that's that. Here are some critical variables before calling sge._run:

            * `self.tmp_dir` is the directory to store all parts.
            * `sge.command` is the command template that has to have %(binary) and %(part)s;
               here is an example:

                    sge.command = 'perl %(binary)s %(part)s'

            * `sge.wild_card_for_partial_results`. so everything is split into parts, sent to
               the cluster, and followed until all processes are done. output files expected to
               be found in the self.tmp_dir directory, and it is the user's responsibility to
               format sge.command in a proper way to make sure that is happening. this property
               is a wildcard to look for in self.tmp_dir to merge partial results into one
               results file. here is an example:

                    sge.wild_card_for_partial_results = "results.01.phymm*part-*.txt"

            * `self.merged_results_file_path` is the file all partial results will be merged into one
               file for downstream analyses.
        """

        self.input_file_path = None
        self.merged_results_file_path = None
        self.num_entries_per_file = 10
        self.tmp_dir = None
        self.wild_card_for_partial_results = None

        self.progress = Progress()
        self.run = Run()

        self.input_is_fasta = True

        self.binary = None
        self.command = None


    def _run(self):
        self.check_sge_binaries()

        if not self.binary:
            raise ConfigError('A binary has to be declared.')
        if not self.command:
            raise ConfigError('SGE module cannot run without a command.')
        if not self.tmp_dir:
            raise ConfigError('SGE module needs a tmp dir.')

        filesnpaths.is_file_exists(self.input_file_path)
        filesnpaths.is_output_file_writable(self.merged_results_file_path)

        self.run.info('temp_directory', self.tmp_dir)

        parts = self.split_input_file()

        old_workdir = os.getcwd()
        os.chdir(os.path.dirname(self.tmp_dir))
        self.clusterize(parts)

        if self.wild_card_for_partial_results:
            self.merge_partial_results()
        os.chdir(old_workdir)


    def merge_partial_results(self):
        self.progress.update('Partial results file are being concatenated ...')
        files_to_concat = glob.glob(os.path.join(self.tmp_dir, self.wild_card_for_partial_results))
        if not files_to_concat:
            raise ConfigError("Wild card '%s' didn't return any files to concatenate." % self.wild_card_for_partial_results)

        utils.concatenate_files(self.merged_results_file_path, files_to_concat)


    def check_sge_binaries(self):
        filesnpaths.is_program_exists('qsub')
        filesnpaths.is_program_exists('qstat')


    def split_input_file(self):
        parts = []
        next_part = 1
        part_obj = None

        if self.input_is_fasta:
            fasta = u.SequenceSource(self.input_file_path)

            while next(fasta):
                if (fasta.pos - 1) % self.num_entries_per_file == 0:
                    self.progress.update('Creating part: ~ %s' % (pp(next_part)))

                    if part_obj:
                        part_obj.close()

                    file_path = os.path.join(self.tmp_dir, 'part-%08d.fa' % next_part)
                    parts.append(file_path)
                    next_part += 1
                    part_obj = open(file_path, 'w')

                part_obj.write('>%s\n' % fasta.id)
                part_obj.write('%s\n' % fasta.seq)

            if part_obj:
                part_obj.close()

        return parts


    def clusterize(self, parts):
        # create a 8 digits random identifier for cluster jobs:
        identifier = ''.join(random.choice(string.ascii_uppercase) for x in range(10))

        for part in parts:
            command = self.command % {'binary': self.binary, 'part': part}

            # create sh file
            shell_script = part + '.sh'
            open(shell_script, 'w').write(QSUB_SCRIPT % {'log': part + '.log',
                                                         'identifier': identifier,
                                                         'command': command})

            # submit script to cluster
            utils.run_command('qsub %s' % shell_script)


        while True:
            qstat_info = self.get_qstat_info(identifier)
            total_processes = sum(qstat_info.values())
            if total_processes == 0:
                break

            self.progress.update('Qstat Info :: Total Jobs: %s, %s' % (pp(total_processes),
                       ', '.join(['%s: %s' % (x, pp(qstat_info[x])) for x in qstat_info])))

            time.sleep(5)

        return True


    def get_qstat_info(self, job_identifier):
        try:
            proc = subprocess.Popen(['qstat'], stdout=subprocess.PIPE)
        except OSError as e:
            raise ConfigError("qstat command was failed for the following reason: '%s'" % (e))

        qstat_state_codes = {'Pending': ['qw', 'hqw', 'hRwq'],
                             'Running': ['r', 't', 'Rr', 'Rt'],
                             'Suspended': ['s', 'ts', 'S', 'tS', 'T', 'tT'],
                             'Error': ['Eqw', 'Ehqw', 'EhRqw'],
                             'Deleted': ['dr', 'dt', 'dRr', 'dRt', 'ds', 'dS', 'dT', 'dRs', 'dRS', 'dRT']}

        info_dict = {'Pending': 0, 'Running': 0, 'Suspended': 0, 'Error': 0, 'Deleted': 0}
        line_no = 0

        while True:
            line = proc.stdout.readline()

            # skip the first two lines
            if line_no < 2:
                line_no += 1
                continue

            if line != '':
                id, priority, name, user, state = line.strip().split()[0:5]
                if name == job_identifier:
                    found = False
                    for s in qstat_state_codes:
                        if state in qstat_state_codes[s]:
                            found = True
                            info_dict[s] += 1
                    if not found:
                        raise ConfigError("Unknown state for qstat: '%s' (known states: '%s')"\
                                 % (state, ', '.join(list(info_dict.keys()))))

                line_no += 1
            else:
                break

        return info_dict