source: branches/intermine_0_92/bio/sources/example-sources/kegg-example/main/src/org/intermine/bio/dataconversion/KeggExampleConverter.java @ 20177

Revision 20177, 6.0 KB checked in by julie, 2 years ago (diff)

fix constructor

Line 
1package org.intermine.bio.dataconversion;
2
3/*
4 * Copyright (C) 2002-2009 FlyMine
5 *
6 * This code may be freely distributed and modified under the
7 * terms of the GNU Lesser General Public Licence.  This should
8 * be distributed with the code.  See the LICENSE file for more
9 * information or http://www.gnu.org/copyleft/lesser.html.
10 *
11 */
12
13import java.io.File;
14import java.io.IOException;
15import java.io.Reader;
16import java.util.HashMap;
17import java.util.Iterator;
18
19import org.apache.commons.lang.StringUtils;
20import org.apache.log4j.Logger;
21import org.intermine.dataconversion.ItemWriter;
22import org.intermine.metadata.Model;
23import org.intermine.objectstore.ObjectStoreException;
24import org.intermine.util.FormattedTextParser;
25import org.intermine.xml.full.Item;
26
27
28/**
29 * DataConverter to load Kegg Pathways and link them to Genes
30 *
31 * @author Richard Smith
32 */
33public class KeggExampleConverter extends BioFileConverter
34{
35    protected static final Logger LOG = Logger.getLogger(KeggExampleConverter.class);
36
37    protected HashMap<String, Item> pathwayMap = new HashMap<String, Item>();
38    private String taxonId = null;
39    private Item organism = null;
40
41    /**
42     * Constructor
43     * @param writer the ItemWriter used to handle the resultant items
44     * @param model the Model
45     */
46    public KeggExampleConverter(ItemWriter writer, Model model) {
47        super(writer, model, "GenomeNet", "KEGG PATHWAY");
48    }
49
50    /**
51     * Set the taxon id to process.
52     * @param taxonId the id
53     */
54    public void setTaxonId(String taxonId) {
55        this.taxonId = taxonId;
56    }
57
58    /**
59     * Called for each file found by ant.
60     *
61     * {@inheritDoc}
62     */
63    public void process(Reader reader) throws Exception {
64        if (StringUtils.isEmpty(taxonId)) {
65            throw new IllegalArgumentException("No taxonId provided: " + taxonId);
66        }
67
68        // There are two files:
69        //              map_title.tab - pathway ids and their names
70        //      xxx_gene_map.tab - genes and the pathways they are involved in
71        // The following code works out which file we are reading and calls the corresponding method
72        File currentFile = getCurrentFile();
73
74        if (currentFile.getName().equals("map_title.tab")) {
75            processMapTitleFile(reader);
76        } else if (currentFile.getName().endsWith("gene_map.tab")) {
77            processGeneMapFile(reader);
78        } else {
79            throw new IllegalArgumentException("Unexpected file: " + currentFile.getName());
80        }
81    }
82
83
84    /**
85     * Process all rows of the map_title.tab file
86     * @param reader a reader for the map_title.tab file
87     * @throws IOException
88     * @throws ObjectStoreException
89     */
90    private void processMapTitleFile(Reader reader) throws IOException, ObjectStoreException {
91        Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader);
92
93        // this file has data of the format:
94        // pathway id | pathway name
95        while (lineIter.hasNext()) {
96            // line is a string array with the one element for each tab separated value
97            // on the next line of the file
98            String[] line = (String[]) lineIter.next();
99
100            String pathwayId = line [0];
101            String pathwayName = line[1];
102
103            // getPathway will create an Item or fetch it from a map if seen before
104            Item pathway = getPathway(pathwayId);
105            pathway.setAttribute("name", pathwayName);
106
107            // once we have set the pathway name that is all the information needed so we can store
108            store(pathway);
109        }
110    }
111
112    /**
113     * Process all rows of the xxx_gene_map.tab file
114     * @param reader a reader for the xxx_gene_map.tab file
115     * @throws IOException
116     * @throws ObjectStoreException
117     */
118    private void processGeneMapFile(Reader reader) throws IOException, ObjectStoreException {
119        // this file has data of the format:
120        // gene id | pathway ids (space separated)
121
122        Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader);
123
124        while (lineIter.hasNext()) {
125            // line is a string array with the one element for each tab separated value
126            // on the next line of the file
127            String[] line = (String[]) lineIter.next();
128
129            String geneId = line[0];
130
131            // create a gene with this id as primaryIdentifier
132            Item gene = createItem("Gene");
133            gene.setAttribute("primaryIdentifier", geneId);
134            gene.setReference("organism", getOrganism());
135
136            // split the space separated list of pathway ids
137            String[] pathwayIds = line[1].split(" ");
138
139            // add each pathway to the Gene.pathways collection
140            for (String pathwayId : pathwayIds) {
141                // getPathway() will create a new pathway or fetch it from a map if already seen
142                Item pathway = getPathway(pathwayId);
143                gene.addToCollection("pathways", pathway);
144            }
145
146            // we have finished with this gene now so can store it
147            store(gene);
148        }
149    }
150
151    /**
152     * Create a new pathway Item or fetch from a map if it has been seen before
153     * @param pathwayId the id of a KEGG pathway to look up
154     * @return an Item representing the pathway
155     */
156    private Item getPathway(String pathwayId) {
157        Item pathway = pathwayMap.get(pathwayId);
158        if (pathway == null) {
159            pathway = createItem("Pathway");
160            pathway.setAttribute("identifier", pathwayId);
161            pathwayMap.put(pathwayId, pathway);
162        }
163        return pathway;
164    }
165
166    /**
167     * Get an Item representing an organism, create and store it if called for the first time
168     * @return an Item representing the organism
169     * @throws ObjectStoreException
170     */
171    private Item getOrganism() throws ObjectStoreException {
172        if (organism == null) {
173            organism = createItem("Organism");
174            organism.setAttribute("taxonId", taxonId);
175            store(organism);
176        }
177        return organism;
178    }
179}
Note: See TracBrowser for help on using the repository browser.