package owltools.ncbi; import static org.junit.Assert.*; import java.util.Arrays; import java.util.Collections; import java.util.Set; import java.util.List; import java.util.ArrayList; import org.junit.Test; import org.semanticweb.owlapi.formats.RDFXMLDocumentFormat; import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.model.OWLOntology; import org.semanticweb.owlapi.model.OWLDataFactory; import org.semanticweb.owlapi.model.OWLClass; import org.semanticweb.owlapi.model.OWLAnnotationProperty; import org.semanticweb.owlapi.model.OWLClassAxiom; import org.semanticweb.owlapi.model.OWLAnnotationAxiom; import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; import org.semanticweb.owlapi.model.AxiomType; import org.semanticweb.owlapi.model.parameters.Imports; import owltools.ncbi.NCBIOWL; import owltools.ncbi.NCBI2OWL; /** * Tests for {@link NCBI2OWL}. * */ public class NCBI2OWLTest { private int oio = 8; // OboInOwl annotation properties private int props = 2; // other annotation properties: IAO_0000115 definition, has_rank private int ranks = 29; // specified ranks private int types = 16; // specified synonym types private int baseTaxa = 0; private int baseAnnotations = ranks // rdfs:labels for each rank + ranks // oio:hasOBONamespace for each rank + 4 // annotations on has_rank + 1 // rdfs:comment on taxonomic_rank + types // rdfs:labels for each type + types - 1 // oio:hasScope for each type except synonym_type_property + oio // OIO rdfs:labels + 1 // rdfs:label on synonym_type_property ; private int sampleTaxa = 12; // taxa in sample.dat private int sampleAnnotations = baseAnnotations + 9 // RANK for most of the taxa + sampleTaxa // SCIENTIFIC NAME for each taxon + sampleTaxa // GD ID for each taxon + sampleTaxa // oio:hasOBONamespace for each taxon + 3 // GENBANK COMMON NAME + 9 // SYNONYM + 4 // IN-PART + 3 // BLAST NAME + 2 // exact synonym annotations for Actinobacteria ; /** * */ @Test public void testCreate() { assertEquals("Count ranks", ranks, NCBIOWL.ranks.size() + 1); // plus taxonomy_rank assertEquals("Count synonym types", types, NCBIOWL.synonymTypes.size() + 1); // plus synonym_type_property try { OWLOntology ontology = NCBIOWL.createOWLOntology(); // Uncomment these lines to save the file. //String outputPath = "create.owl"; //File outputFile = new File(outputPath); //IRI outputIRI = IRI.create(outputFile); //ontology.getOWLOntologyManager().saveOntology(ontology, // outputIRI); try { testOntology(ontology, baseTaxa, baseAnnotations); } catch (Exception e) { System.out.println("Exception in testCreate testOntology: " + e.toString()); } finally { ontology.getOWLOntologyManager().removeOntology(ontology); } } catch (Exception e) { System.out.println("Exception in testCreate: " + e.toString()); } } /** * Convert a sample.dat file and then check the ontology, one of * the classes, one of the ranks, and one of the annotation properties. */ @Test public void testConvert() { String inputPath = "src/test/resources/sample.dat"; String outputPath = "sample.owl"; try { OWLOntology ontology = NCBI2OWL.convertToOWL(inputPath, null); // Uncomment these lines to save the file and axioms //String outputPath = "create.owl"; //File outputFile = new File(outputPath); //IRI outputIRI = IRI.create(outputFile); //ontology.getOWLOntologyManager().saveOntology(ontology, // outputIRI); //NCBI2OWL.printAxioms(ontology, "sample.txt"); try { testOntology(ontology, sampleTaxa, sampleAnnotations); testBacteria(ontology); testActinobacteria(ontology); testSpecies(ontology); testExactSynonym(ontology); } finally { ontology.getOWLOntologyManager().removeOntology(ontology); } } catch (Exception e) { System.out.println("Exception in testConvert: " + e.toString()); } } private void testOntology(OWLOntology ontology, int taxa, int annotations) { int declarations = taxa // taxon class declarations + oio + props // OIO and other annotation properties + types + ranks; assertEquals("Count declarations", declarations, ontology.getAxiomCount(AxiomType.DECLARATION)); int taxaExceptRoot = 0; if (taxa > 0) { taxaExceptRoot = taxa - 1; } int subClasses = taxaExceptRoot // taxa except root + ranks - 1; // ranks except taxonomy_rank assertEquals("Count subClass assertions", subClasses, ontology.getAxiomCount(AxiomType.SUBCLASS_OF)); int subAnnotationProperties = types - 1; // taxa except synonym_type_property assertEquals("Count subClass assertions", subAnnotationProperties, ontology.getAxiomCount( AxiomType.SUB_ANNOTATION_PROPERTY_OF)); assertEquals("Count annotation assertions", annotations, ontology.getAxiomCount(AxiomType.ANNOTATION_ASSERTION)); int axioms = declarations + subClasses + subAnnotationProperties + annotations; assertEquals("Count all axioms", axioms, ontology.getAxiomCount()); } private void testBacteria(OWLOntology ontology) { String curie = "ncbi:2"; IRI iri = OWLConverter.format.getIRI(curie); OWLDataFactory df = ontology.getOWLOntologyManager(). getOWLDataFactory(); OWLClass taxon = df.getOWLClass(iri); assertTrue("Bacteria class in signature", ontology.containsClassInSignature(iri)); // Check axioms Set<OWLClassAxiom> axioms = ontology.getAxioms(taxon, Imports.EXCLUDED); assertEquals("Count class axioms for Bacteria", 1, axioms.size()); assertEquals("SubClassOf(<http://purl.obolibrary.org/obo/NCBITaxon_2> <http://purl.obolibrary.org/obo/NCBITaxon_131567>)", axioms.toArray()[0].toString()); // Check annotations List<String> values = new ArrayList<String>(); values.add(expandAnnotation(curie, "ncbitaxon:has_rank", OWLConverter.format.getIRI("ncbi:superkingdom"))); values.add(expandAnnotation(curie, "oio:hasOBONamespace", "ncbi_taxonomy")); values.add(expandAnnotation(curie, "oio:hasDbXref", "GC_ID:11")); values.add(expandLabel(curie, "rdfs:label", "Bacteria")); values.add(expandSynonym(curie, "ncbitaxon:genbank_common_name", "oio:hasExactSynonym", "eubacteria")); values.add(expandSynonym(curie, "ncbitaxon:synonym", "oio:hasRelatedSynonym", "not Bacteria Haeckel 1894")); values.add(expandSynonym(curie, "ncbitaxon:in_part", "oio:hasRelatedSynonym", "Prokaryota")); values.add(expandSynonym(curie, "ncbitaxon:in_part", "oio:hasRelatedSynonym", "Monera")); values.add(expandSynonym(curie, "ncbitaxon:in_part", "oio:hasRelatedSynonym", "Procaryotae")); values.add(expandSynonym(curie, "ncbitaxon:in_part", "oio:hasRelatedSynonym", "Prokaryotae")); values.add(expandSynonym(curie, "ncbitaxon:blast_name", "oio:hasRelatedSynonym", "eubacteria")); Set<OWLAnnotationAssertionAxiom> annotations = ontology.getAnnotationAssertionAxioms(iri); assertEquals("Count annotations for Bacteria", values.size(), annotations.size()); checkAnnotations(annotations, values); } private void testActinobacteria(OWLOntology ontology) { String curie = "ncbi:201174"; IRI iri = OWLConverter.format.getIRI(curie); OWLDataFactory df = ontology.getOWLOntologyManager(). getOWLDataFactory(); OWLClass taxon = df.getOWLClass(iri); assertTrue("Actinobacteria class in signature", ontology.containsClassInSignature(iri)); // Check axioms Set<OWLClassAxiom> axioms = ontology.getAxioms(taxon, Imports.EXCLUDED); assertEquals("Count class axioms for Actinobacteria", 1, axioms.size()); assertEquals("SubClassOf(<http://purl.obolibrary.org/obo/NCBITaxon_201174> <http://purl.obolibrary.org/obo/NCBITaxon_2>)", axioms.toArray()[0].toString()); // Check annotations List<String> values = new ArrayList<String>(); values.add(expandAnnotation(curie, "ncbitaxon:has_rank", OWLConverter.format.getIRI("ncbi:phylum"))); values.add(expandAnnotation(curie, "oio:hasOBONamespace", "ncbi_taxonomy")); values.add(expandAnnotation(curie, "oio:hasDbXref", "GC_ID:11")); values.add(expandLabel(curie, "rdfs:label", "Actinobacteria [NCBITaxon:201174]")); values.add(expandSynonym(curie, "ncbitaxon:scientific_name", "oio:hasExactSynonym", "Actinobacteria")); values.add(expandSynonym(curie, "ncbitaxon:synonym", "oio:hasRelatedSynonym", "'Actinobacteria'")); values.add(expandSynonym(curie, "ncbitaxon:synonym", "oio:hasRelatedSynonym", "not Actinobacteria Cavalier-Smith 2002")); values.add(expandSynonym(curie, "ncbitaxon:blast_name", "oio:hasRelatedSynonym", "actinobacteria")); Set<OWLAnnotationAssertionAxiom> annotations = ontology.getAnnotationAssertionAxioms(iri); assertEquals("Count annotations for Actinobacteria", values.size(), annotations.size()); checkAnnotations(annotations, values); } private String expandLabel(String subject, String property, String value) { RDFXMLDocumentFormat format = OWLConverter.format; // TODO: Why is "Annotation" always doubled? return "AnnotationAssertion(" + property + " <" + format.getIRI(subject) + "> \"" + value +"\"^^xsd:string)"; } private String expandAnnotation(String subject, String property, String value) { RDFXMLDocumentFormat format = OWLConverter.format; // TODO: Why is "Annotation" always doubled? return "AnnotationAssertion(<" + format.getIRI(property) + "> <" + format.getIRI(subject) + "> \"" + value +"\"^^xsd:string)"; } private String expandAnnotation(String subject, String property, IRI value) { RDFXMLDocumentFormat format = OWLConverter.format; // TODO: Why is "Annotation" always doubled? return "AnnotationAssertion(<" + format.getIRI(property) + "> <" + format.getIRI(subject) + "> <" + value.toString() + ">)"; } private String expandSynonym(String subject, String type, String property, String value) { RDFXMLDocumentFormat format = OWLConverter.format; return "AnnotationAssertion(Annotation(<" + format.getIRI("oio:hasSynonymType").toString() + "> <" + format.getIRI(type).toString() + ">) <" + format.getIRI(property) + "> <" + format.getIRI(subject) + "> \"" + value +"\"^^xsd:string)"; } private void testSpecies(OWLOntology ontology) { IRI iri = IRI.create("http://purl.obolibrary.org/obo/NCBITaxon_species"); OWLDataFactory df = ontology.getOWLOntologyManager(). getOWLDataFactory(); OWLClass taxon = df.getOWLClass(iri); assertTrue("Species class in signature", ontology.containsClassInSignature(iri)); // Check axioms Set<OWLClassAxiom> axioms = ontology.getAxioms(taxon, Imports.EXCLUDED); assertEquals("Count class axioms", 1, axioms.size()); assertEquals("SubClassOf(<http://purl.obolibrary.org/obo/NCBITaxon_species> <http://purl.obolibrary.org/obo/NCBITaxon#_taxonomic_rank>)", axioms.toArray()[0].toString()); // Check annotations List<String> values = new ArrayList<String>(); values.add("AnnotationAssertion(<http://www.geneontology.org/formats/oboInOwl#hasOBONamespace> <http://purl.obolibrary.org/obo/NCBITaxon_species> \"ncbi_taxonomy\"^^xsd:string)"); values.add("AnnotationAssertion(rdfs:label <http://purl.obolibrary.org/obo/NCBITaxon_species> \"species\"^^xsd:string)"); Set<OWLAnnotationAssertionAxiom> annotations = ontology.getAnnotationAssertionAxioms(iri); assertEquals("Count annotations for Species", 2, annotations.size()); checkAnnotations(annotations, values); } private void testExactSynonym(OWLOntology ontology) { IRI iri = IRI.create("http://www.geneontology.org/formats/oboInOwl#hasExactSynonym"); OWLDataFactory df = ontology.getOWLOntologyManager(). getOWLDataFactory(); OWLAnnotationProperty property = df.getOWLAnnotationProperty(iri); assertTrue("Exact Synonym property in signature", ontology.containsAnnotationPropertyInSignature(iri)); // Check axioms Set<OWLAnnotationAxiom> axioms = ontology.getAxioms(property, Imports.EXCLUDED); assertEquals("Count class axioms", 0, axioms.size()); // Check annotations List<String> values = new ArrayList<String>(); values.add("AnnotationAssertion(rdfs:label <http://www.geneontology.org/formats/oboInOwl#hasExactSynonym> \"has_exact_synonym\"^^xsd:string)"); Set<OWLAnnotationAssertionAxiom> annotations = ontology.getAnnotationAssertionAxioms(iri); assertEquals("Count annotations for Exact", 1, annotations.size()); checkAnnotations(annotations, values); } private void checkAnnotations(Set<OWLAnnotationAssertionAxiom> axioms, List<String> values) { List<String> results = new ArrayList<String>(); for (OWLAnnotationAssertionAxiom axiom : axioms) { String string = axiom.toString(); results.add(string); } assertEquals("Compare sizes", values.size(), results.size()); java.util.Collections.sort(values); java.util.Collections.sort(results); for (int i=0; i < results.size(); i++) { assertEquals("Check Annotation " + i, values.get(i), results.get(i)); } } /** * Test parsing of various well-formed and malformed lines. */ @Test public void testParseLine() { testGoodLine("ID : 2", "id", "2"); testGoodLine("PARENT ID : 131567", "parent id", "131567"); testGoodLine("PARENT ID: 131567", "parent id", "131567"); testGoodLine("PARENT ID:131567", "parent id", "131567"); testGoodLine("RANK : superkingdom", "rank", "superkingdom"); testGoodLine("GC ID : 11", "gc id", "11"); testGoodLine("SCIENTIFIC NAME: Bacteria", "scientific name", "Bacteria"); testGoodLine("GENBANK COMMON NAME : eubacteria", "genbank common name", "eubacteria"); testGoodLine("SYNONYM : not Bacteria Haeckel 1894", "synonym", "not Bacteria Haeckel 1894"); testGoodLine("IN-PART: Prokaryota ", "in-part", "Prokaryota"); testGoodLine("BLAST NAME \t\t : eubacteria\t", "blast name", "eubacteria"); // Turn the logger off to test bad lines. org.apache.log4j.Level level = NCBI2OWL.logger.getLevel(); NCBI2OWL.logger.setLevel(org.apache.log4j.Level.OFF); testBadLine(""); testBadLine("ID"); testBadLine("ID ; 1"); testBadLine(": 1"); testBadLine(" : 1"); testBadLine("ID: "); testBadLine("ID:"); // Restore the logger level. NCBI2OWL.logger.setLevel(level); } private void testGoodLine(String line, String key, String value) { String[] result = NCBI2OWL.parseLine(line, 0); assertEquals("Match key: " + line, key, result[0]); assertEquals("Match value: " + line, value, result[1]); } private void testBadLine(String line) { assertNull("Bad line: " + line, NCBI2OWL.parseLine(line, 0)); } /** * Test the expected return values for a given set of lines in the dmp format. */ @Test public void testSplitDmpLine() { assertSplitDmpLine("", Collections.<String>emptyList()); // empty line return empty list assertSplitDmpLine("|", Collections.<String>singletonList(null)); assertSplitDmpLine("5658 | Leishmania | Leishmania <genus> | scientific name |", Arrays.asList("5658", "Leishmania", "Leishmania <genus>", "scientific name")); assertSplitDmpLine("5659 | Leishmania (Leishmania) amazonensis | | synonym |", Arrays.asList("5659", "Leishmania (Leishmania) amazonensis", null, "synonym")); } private void assertSplitDmpLine(String line, List<String> expected) { List<String> split = NCBI2OWL.splitDmpLine(line); assertArrayEquals(expected.toArray(new String[expected.size()]), split.toArray(new String[split.size()])); } /** * Test the expected return values for splitting a whitespace separated list of ids. */ @Test public void testSplitTaxonIds() { assertSplitTaxonIds("", Collections.<String>emptyList()); assertSplitTaxonIds(" ", Collections.<String>emptyList()); assertSplitTaxonIds("555", Collections.singletonList("555")); assertSplitTaxonIds("555 556", Arrays.asList("555","556")); assertSplitTaxonIds("555 556", Arrays.asList("555","556")); assertSplitTaxonIds("555 556 557 558 559", Arrays.asList("555","556","557","558","559")); } private void assertSplitTaxonIds(String line, List<String> expected) { List<String> split = NCBI2OWL.splitTaxonList(line); assertArrayEquals(expected.toArray(new String[expected.size()]), split.toArray(new String[split.size()])); } }