SPARQL examples for the EMI
View the Project on GitHub earth-metabolome-initiative/sparql-examples
List all human UniProtKB entries and their sequences, marking if the sequence listed is the cannonical sequence of the matching entry.
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT ?entry ?sequence ?isCanonical
WHERE {
# We don't want to look into the UniParc graph which will
# confuse matters
GRAPH <http://sparql.uniprot.org/uniprot> {
# we need the UniProt entries that are human
?entry a up:Protein ;
up:organism taxon:9606 ;
up:sequence ?sequence .
# If the sequence is a "Simple_Sequence" it is likely to be the
# cannonical sequence
OPTIONAL {
?sequence a up:Simple_Sequence .
BIND(true AS ?likelyIsCanonical)
}
# unless we are dealing with an external isoform
# see https://www.uniprot.org/help/canonical_and_isoforms
OPTIONAL {
FILTER(?likelyIsCanonical)
?sequence a up:External_Sequence .
BIND(true AS ?isComplicated)
}
# If it is an external isoform it's id would not match the
# entry primary accession
BIND(IF(?isComplicated, STRENDS(STR(?entry), STRBEFORE(SUBSTR(STR(?sequence), 34),'-')),?likelyIsCanonical) AS ?isCanonical)
}
}
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
v2("?entry"):::projected
v5("?isCanonical"):::projected
v4("?isComplicated")
v4("?likelyIsCanonical")
v3("?sequence"):::projected
c8(["up:External_Sequence"]):::iri
c2(["up:Protein"]):::iri
c7(["up:Simple_Sequence"]):::iri
c5(["taxon:9606"]):::iri
v2 --"a"--> c2
v2 --"up:organism"--> c5
v2 --"up:sequence"--> v3
subgraph optional0["(optional)"]
style optional0 fill:#bbf,stroke-dasharray: 5 5;
v3 -."a".-> c7
bind0[/"'true^^xsd:boolean'"/]
bind0 --as--o v4
end
subgraph optional1["(optional)"]
style optional1 fill:#bbf,stroke-dasharray: 5 5;
v3 -."a".-> c8
bind1[/"'true^^xsd:boolean'"/]
bind1 --as--o v4
end
bind2[/"if(?isComplicated,ends-with(str(?entry),substring-before(substring(str(?sequence),'34^^xsd:integer'),'-')),?likelyIsCanonical)"/]
v4 --o bind2
v2 --o bind2
v3 --o bind2
v4 --o bind2
bind2 --as--o v5