File size: 1,635 Bytes
a0303d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import streamlit as st
from tname import *


def Strip(seq_file):
    with open(seq_file.name, "r") as f:
        contents = f.readlines()
    ina = Name()
    ina = ina + r"input.tsv"  # 结果文件名称

    # 去除序列文件中的空格,并写入新的文件中
    for i in range(0, len(contents) - 1):
        if contents[i][0] != '>' and contents[i + 1][0] != '>':
            content = contents[i].split()
            content = content[0]
        else:
            content = contents[i]
        with open(ina, "a") as f:
            f.write(content)
    # 最后一行特殊,单独写入
    with open(ina, "a") as f:
        f.write(contents[len(contents) - 1])
    return ina


def Merge(smi_file, seq_file):
    with open(smi_file.name, "r") as f1:
        smile = f1.readline()
    smile = smile.strip("\n")

    # 读取去掉空格后的文件
    with open(seq_file.name, "r") as f:
        contents = f.readlines()

    name = Name()
    name = name + r"kcat_input.tsv"  # 结果文件名称

    with open(name, "a") as f3:
        f3.write("Substrate Name	Substrate SMILES	Protein Sequence")
        f3.write("\n")

    for i in range(0, len(contents)):
        if i % 2 == 1:
            with open(name, "a") as f3:
                # 写入索引
                # content = contents[i - 1].split()
                # f3.write(content[0])
                f3.write(">seq" + str(int((i - 1) / 2)))
                f3.write("\t")
                # 写入smile名称
                f3.write(smile)
                f3.write("\t")
                # 写入序列
                f3.write(contents[i])
    return name