#open and read in as list our file containing our genes of interest genesofinterest = open('diffexpressedgenes.txt', 'r') genelist = genesofinterest.readlines() genesofinterest.close() #edit each gene name to change to appropriate format goodnamelist = [] for gene in genelist: newname = gene.rstrip('\n') withcarot = '>' + newname goodnamelist.append(withcarot) #read in file containing gene ID terms and GO terms and save it as a list allgoterms = {} allgos = open('contigs_GO.txt', 'r') golist = allgos.readlines() goodgolist = [] for item in golist: newname2 = item.rstrip('\n') goodgolist.append(newname2) #convert list of gene ID and GO terms to dictionary #gene ID is key and associated GO terms are the values allgodict = {} i = 0 while i < len(goodgolist): allgodict.update({goodgolist[i]: goodgolist[i+1]}) i += 2 #create dictionary of gene IDs and GO terms only for our genes of interest diffexpressdict = {} for name in goodnamelist: if name[0] == '>': for term in goodgolist: if name == term: diffexpressdict.update({name: allgodict[name]}) #write our genes of interest and associated GO terms to a text file output = open('genes_with_GOterms.txt', 'w') output2 = open('genes_lacking_GOterms.txt', 'w') for contig in diffexpressdict: output.write(contig + '; ' + diffexpressdict[contig] + '\n') for genename in goodnamelist: if genename in diffexpressdict.keys(): print genename + ' in dictionary' else: output2.write(genename + '\n') output.close() output2.close()