diff --git a/doc/src/geckomat/get_enzyme_data/loadDatabases.html b/doc/src/geckomat/get_enzyme_data/loadDatabases.html
index 69c4a25f4..80e12d51a 100644
--- a/doc/src/geckomat/get_enzyme_data/loadDatabases.html
+++ b/doc/src/geckomat/get_enzyme_data/loadDatabases.html
@@ -141,133 +141,142 @@
SOURCE CODE else
0080 databases.uniprot = [];
0081 end
-0082 end
-0083
-0084
-0085 if any(strcmp(selectDatabase,{'kegg','both'}))
-0086 keggPath = fullfile(filePath,'kegg.tsv');
-0087 if ~exist(keggPath,'file')
-0088 if isempty(kegg.ID)
-0089 printOrange('WARNING: No kegg.ID is specified, unable to download KEGG DB.\n')
-0090 else
-0091 downloadKEGG(kegg.ID,keggPath,kegg.geneID);
-0092 end
-0093 end
-0094 if exist(keggPath,'file')
-0095 fid = fopen(keggPath,'r');
-0096 fileContent = textscan(fid,'%q %q %q %q %q %q %q','Delimiter',',','HeaderLines',0);
-0097 fclose(fid);
-0098 databases.kegg.uniprot = fileContent{1};
-0099 databases.kegg.genes = fileContent{2};
-0100 databases.kegg.keggGene = fileContent{3};
-0101 databases.kegg.eccodes = fileContent{4};
-0102 databases.kegg.MW = str2double(fileContent{5});
-0103 databases.kegg.pathway = fileContent{6};
-0104 databases.kegg.seq = fileContent{7};
-0105 else
-0106 databases.kegg = [];
-0107 end
-0108 end
-0109 end
-0110
-0111 function downloadKEGG(keggID, filePath, keggGeneID)
-0112
-0113 progressbar(['Downloading KEGG data for organism code ' keggID])
-0114 webOptions = weboptions('Timeout',30);
-0115 try
-0116 gene_list = webread(['http://rest.kegg.jp/list/' keggID],webOptions);
-0117 catch ME
-0118 switch ME.identifier
-0119 case 'MATLAB:webservices:HTTP400StatusCodeError'
-0120 error(['Unable to download data form KEGG with a potentially invalid ID: ' keggID ])
-0121 end
-0122 end
-0123 gene_list = regexpi(gene_list, '[^\n]+','match')';
-0124 gene_id = regexpi(gene_list,['(?<=' keggID ':)\S+'],'match');
-0125
-0126
-0127 genesPerQuery = 10;
-0128 queries = ceil(numel(gene_id)/genesPerQuery);
-0129 keggData = cell(numel(gene_id),1);
-0130 for i = 1:queries
-0131
-0132 firstIdx = i*genesPerQuery-(genesPerQuery-1);
-0133 lastIdx = i*genesPerQuery;
-0134 if lastIdx > numel(gene_id)
-0135 lastIdx = numel(gene_id);
-0136 end
-0137 url = ['http://rest.kegg.jp/get/' keggID ':' strjoin([gene_id{firstIdx:lastIdx}],['+' keggID ':'])];
-0138
-0139 retry = true;
-0140 while retry
-0141 try
-0142 retry = false;
-0143 out = webread(url,webOptions);
-0144 catch
-0145 retry = true;
-0146 end
-0147 end
-0148 outSplit = strsplit(out,['///' 10]);
-0149 if numel(outSplit) < lastIdx-firstIdx+2
-0150 error('KEGG returns less genes per query')
-0151 end
-0152 keggData(firstIdx:lastIdx) = outSplit(1:end-1);
-0153 progressbar(i/queries)
-0154 end
-0155
-0156
-0157 sequence = regexprep(keggData,'.*AASEQ\s+\d+\s+([A-Z\s])+?\s+NTSEQ.*','$1');
-0158
-0159 noProt = startsWith(sequence,'ENTRY ');
-0160 uni = regexprep(keggData,'.*UniProt: (\S+?)\s.*','$1');
-0161 noUni = startsWith(uni,'ENTRY ');
-0162 uni(noProt | noUni) = [];
-0163 keggData(noProt | noUni) = [];
-0164 sequence(noProt | noUni) = [];
-0165 sequence = regexprep(sequence,'\s*','');
-0166 keggGene = regexprep(keggData,'ENTRY\s+(\S+?)\s.+','$1');
-0167
-0168 switch keggGeneID
-0169 case {'kegg',''}
-0170 gene_name = keggGene;
-0171 otherwise
-0172
-0173 keggGeneIDT = regexptranslate('escape',keggGeneID);
-0174 gene_name = regexprep(keggData,['.+' keggGeneIDT ': (\S+?)\n.+'],'$1');
-0175 noID = ~contains(keggData,keggGeneIDT);
-0176 if all(noID)
-0177 error(['None of the KEGG entries are annotated with the gene identifier ' keggGeneID])
-0178 else
-0179 gene_name(noID)= [];
-0180 keggData(noID) = [];
-0181 keggGene(noID) = [];
-0182 sequence(noID) = [];
-0183 uni(noID) = [];
-0184 end
-0185 end
-0186
-0187 EC_names = regexprep(keggData,'.*ORTHOLOGY.*\[EC:(.*?)\].*','$1');
-0188 EC_names(startsWith(EC_names,'ENTRY ')) = {''};
-0189
-0190 MW = cell(numel(sequence),1);
-0191 for i=1:numel(sequence)
-0192 if ~isempty(sequence{i})
-0193 MW{i} = num2str(round(calculateMW(sequence{i})));
-0194 end
-0195 end
-0196
-0197 pathway = regexprep(keggData,'.*PATHWAY\s+(.*?)(BRITE|MODULE).*','$1');
-0198 pathway(startsWith(pathway,'ENTRY ')) = {''};
-0199 pathway = strrep(pathway,[keggID '01100 Metabolic pathways'],'');
-0200 pathway = regexprep(pathway,'\n','');
-0201 pathway = regexprep(pathway,' ','');
-0202
-0203 out = [uni, gene_name, keggGene, EC_names, MW, pathway, sequence];
-0204 out = cell2table(out);
+0082 if ~isempty(databases.uniprot)
+0083 [uniqueIDs,uniqueIdx] = unique(databases.uniprot.ID,'stable');
+0084 if numel(uniqueIDs) < numel(databases.uniprot.ID)
+0085 duplID = setdiff(1:numel(databases.uniprot.ID),uniqueIdx);
+0086 dispEM(['Duplicate entries are found for the following proteins. '...
+0087 'Manually curate the ''uniprot.tsv'' file, or adjust the uniprot parameters '...
+0088 'in the model adapter:'],true,databases.uniprot.ID(duplID));
+0089 end
+0090 end
+0091 end
+0092
+0093
+0094 if any(strcmp(selectDatabase,{'kegg','both'}))
+0095 keggPath = fullfile(filePath,'kegg.tsv');
+0096 if ~exist(keggPath,'file')
+0097 if isempty(kegg.ID)
+0098 printOrange('WARNING: No kegg.ID is specified, unable to download KEGG DB.\n')
+0099 else
+0100 downloadKEGG(kegg.ID,keggPath,kegg.geneID);
+0101 end
+0102 end
+0103 if exist(keggPath,'file')
+0104 fid = fopen(keggPath,'r');
+0105 fileContent = textscan(fid,'%q %q %q %q %q %q %q','Delimiter',',','HeaderLines',0);
+0106 fclose(fid);
+0107 databases.kegg.uniprot = fileContent{1};
+0108 databases.kegg.genes = fileContent{2};
+0109 databases.kegg.keggGene = fileContent{3};
+0110 databases.kegg.eccodes = fileContent{4};
+0111 databases.kegg.MW = str2double(fileContent{5});
+0112 databases.kegg.pathway = fileContent{6};
+0113 databases.kegg.seq = fileContent{7};
+0114 else
+0115 databases.kegg = [];
+0116 end
+0117 end
+0118 end
+0119
+0120 function downloadKEGG(keggID, filePath, keggGeneID)
+0121
+0122 progressbar(['Downloading KEGG data for organism code ' keggID])
+0123 webOptions = weboptions('Timeout',30);
+0124 try
+0125 gene_list = webread(['http://rest.kegg.jp/list/' keggID],webOptions);
+0126 catch ME
+0127 switch ME.identifier
+0128 case 'MATLAB:webservices:HTTP400StatusCodeError'
+0129 error(['Unable to download data form KEGG with a potentially invalid ID: ' keggID ])
+0130 end
+0131 end
+0132 gene_list = regexpi(gene_list, '[^\n]+','match')';
+0133 gene_id = regexpi(gene_list,['(?<=' keggID ':)\S+'],'match');
+0134
+0135
+0136 genesPerQuery = 10;
+0137 queries = ceil(numel(gene_id)/genesPerQuery);
+0138 keggData = cell(numel(gene_id),1);
+0139 for i = 1:queries
+0140
+0141 firstIdx = i*genesPerQuery-(genesPerQuery-1);
+0142 lastIdx = i*genesPerQuery;
+0143 if lastIdx > numel(gene_id)
+0144 lastIdx = numel(gene_id);
+0145 end
+0146 url = ['http://rest.kegg.jp/get/' keggID ':' strjoin([gene_id{firstIdx:lastIdx}],['+' keggID ':'])];
+0147
+0148 retry = true;
+0149 while retry
+0150 try
+0151 retry = false;
+0152 out = webread(url,webOptions);
+0153 catch
+0154 retry = true;
+0155 end
+0156 end
+0157 outSplit = strsplit(out,['///' 10]);
+0158 if numel(outSplit) < lastIdx-firstIdx+2
+0159 error('KEGG returns less genes per query')
+0160 end
+0161 keggData(firstIdx:lastIdx) = outSplit(1:end-1);
+0162 progressbar(i/queries)
+0163 end
+0164
+0165
+0166 sequence = regexprep(keggData,'.*AASEQ\s+\d+\s+([A-Z\s])+?\s+NTSEQ.*','$1');
+0167
+0168 noProt = startsWith(sequence,'ENTRY ');
+0169 uni = regexprep(keggData,'.*UniProt: (\S+?)\s.*','$1');
+0170 noUni = startsWith(uni,'ENTRY ');
+0171 uni(noProt | noUni) = [];
+0172 keggData(noProt | noUni) = [];
+0173 sequence(noProt | noUni) = [];
+0174 sequence = regexprep(sequence,'\s*','');
+0175 keggGene = regexprep(keggData,'ENTRY\s+(\S+?)\s.+','$1');
+0176
+0177 switch keggGeneID
+0178 case {'kegg',''}
+0179 gene_name = keggGene;
+0180 otherwise
+0181
+0182 keggGeneIDT = regexptranslate('escape',keggGeneID);
+0183 gene_name = regexprep(keggData,['.+' keggGeneIDT ': (\S+?)\n.+'],'$1');
+0184 noID = ~contains(keggData,keggGeneIDT);
+0185 if all(noID)
+0186 error(['None of the KEGG entries are annotated with the gene identifier ' keggGeneID])
+0187 else
+0188 gene_name(noID)= [];
+0189 keggData(noID) = [];
+0190 keggGene(noID) = [];
+0191 sequence(noID) = [];
+0192 uni(noID) = [];
+0193 end
+0194 end
+0195
+0196 EC_names = regexprep(keggData,'.*ORTHOLOGY.*\[EC:(.*?)\].*','$1');
+0197 EC_names(startsWith(EC_names,'ENTRY ')) = {''};
+0198
+0199 MW = cell(numel(sequence),1);
+0200 for i=1:numel(sequence)
+0201 if ~isempty(sequence{i})
+0202 MW{i} = num2str(round(calculateMW(sequence{i})));
+0203 end
+0204 end
0205
-0206 writetable(out, filePath, 'FileType', 'text', 'WriteVariableNames',false);
-0207 fprintf('Model-specific KEGG database stored at %s\n',filePath);
-0208 end
+0206 pathway = regexprep(keggData,'.*PATHWAY\s+(.*?)(BRITE|MODULE).*','$1');
+0207 pathway(startsWith(pathway,'ENTRY ')) = {''};
+0208 pathway = strrep(pathway,[keggID '01100 Metabolic pathways'],'');
+0209 pathway = regexprep(pathway,'\n','');
+0210 pathway = regexprep(pathway,' ','');
+0211
+0212 out = [uni, gene_name, keggGene, EC_names, MW, pathway, sequence];
+0213 out = cell2table(out);
+0214
+0215 writetable(out, filePath, 'FileType', 'text', 'WriteVariableNames',false);
+0216 fprintf('Model-specific KEGG database stored at %s\n',filePath);
+0217 end
Generated by m2html © 2005