Protein Sequence Consequence
Definition and Information Model
Note
This data class is at a trial use maturity level and may change in future releases. Maturity levels are described in the GKS Maturity Model.
Computational Definition
A change that occurs in a protein sequence as a result of genomic changes. Due to the degenerate nature of the genetic code, there are often several genomic changes that can cause a protein sequence consequence. The protein sequence consequence, like a Canonical Allele, is defined by an Allele that is representative of a collection of congruent Protein Alleles that share the same altered codon(s).
Information Model
A ProteinSequenceConsequence is a Categorical Variant with exactly one constraint:
A Defining Allele Constraint with the .relations array containing only a translation_of code. This constraint MUST refer to a protein variant for the allele.
Examples
The following are example implementations of that satisfy the CanonicalAllele recipe:
NM_004958.4(MTOR):c.5992_5993del (p.Met1998fs)
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://w3id.org/ga4gh/schema/cat-vrs/1.0.0/json/example_canonicalAllele-ex1",
"title": "example_canonicalAllele-ex1",
"type": "CategoricalVariant",
"id": "clinvar:662001",
"name": "NM_004958.4(MTOR):c.5992_5993del (p.Met1998fs)",
"description": "An example canonical allele.",
"aliases": [
"NM_004958.4:c.5992_5993del",
"NC_000001.11:g.11128044_11128045del",
"NC_000001.10:g.11188101_11188102del",
"NP_004949.3:p.Met1998fs",
"NG_033239.1:g.139507_139508del",
"LRG_734:g.139507_139508del",
"LRG_734t1:c.5992_5993del"
],
"extensions": [
{
"name": "cytogenetic location",
"value": "1p36.22"
},
{
"name": "clinvar variation type",
"value": "Deletion"
},
{
"name": "hgvs list",
"value": [
{
"nucleotideExpression": {
"syntax": "hgvs.g",
"value": "NC_000001.11:g.11128044_11128045del"
},
"nucleotideType": "genomic"
},
{
"nucleotideExpression": {
"syntax": "hgvs.c",
"value": "NM_004958.4:c.5992_5993del"
},
"nucleotideType": "coding",
"maneSelect": true,
"proteinExpression": {
"syntax": "hgvs.p",
"value": "NP_004949.3:p.Met1998fs"
},
"molecularConsequence": {
"name": "frameshift_variant",
"system": "http://www.sequenceontology.org/browser/",
"systemVersion": "release_2.5.3",
"code": "SO:0001589",
"iris": [
"http://www.sequenceontology.org/browser/release_2.5.3/term/SO:0001589"
]
}
},
{
"nucleotideExpression": {
"syntax": "hgvs.g",
"value": "NG_033239.1:g.139507_139508del"
},
"nucleotideType": "genomic"
},
{
"nucleotideExpression": {
"syntax": "hgvs.g",
"value": "LRG_734:g.139507_139508del"
},
"nucleotideType": "genomic"
},
{
"nucleotideExpression": {
"syntax": "hgvs.c",
"value": "LRG_734t1:c.5992_5993del"
},
"nucleotideType": "coding"
}
]
}
],
"constraints": [
{
"type": "DefiningAlleleConstraint",
"allele": {
"id": "ga4gh:VA.0TMQdMT2OBisJ9FI4tkzaBtxGB7r8FfJ",
"type": "Allele",
"name": "NM_004958.4:c.5992_5993del",
"description": "VRS variation of NM_004958.4:c.5992_5993del, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
"extensions": [
{
"name": "clinvar vcf",
"value": "1-11128043-CAT-C"
}
],
"digest": "0TMQdMT2OBisJ9FI4tkzaBtxGB7r8FfJ",
"expressions": [
{
"syntax": "spdi",
"value": "NC_000001.11:11128043:AT:"
},
{
"syntax": "hgvs.g",
"value": "NC_000001.11:g.11128044_11128045del"
},
{
"syntax": "hgvs.c",
"value": "NM_004958.4:c.5992_5993del"
},
{
"syntax": "hgvs.p",
"value": "NP_004949.1:p.Met1998fs"
}
],
"location": {
"id": "ga4gh:SL.gLe4d4Seuxn5fLCfO_2g34gF86vpGvo1",
"description": "Coding DNA (cDNA) positions 5992 to 5993 of the MTOR MANE Select coding transcript (refseq:NM_004958.4), corresponding to genomic positions 11128044 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38).",
"extensions": [
{
"name": "GRCh38 1-based, inclusive interval",
"value": "chr1:11128044-11128045",
"description": "Genomic positions 11128044 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 1-based, inclusive interval notation."
},
{
"name": "GRCh38 0-based, half-open interval",
"value": "chr1:11128043-11128045",
"description": "Genomic positions 11128043 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 0-based, half-open interval notation."
}
],
"type": "SequenceLocation",
"digest": "gLe4d4Seuxn5fLCfO_2g34gF86vpGvo1",
"sequenceReference": {
"id": "refseq:NM_004958.4",
"name": "NM_004958.4",
"description": "The MANE Select (GRCh38) coding transcript for MTOR.",
"aliases": [
"ensembl:ENST00000361445.9",
"ga4gh:SQ.QheGYEnKbwNpM3LulbPTBQhyBSyZwuYm"
],
"type": "SequenceReference",
"refgetAccession": "SQ.QheGYEnKbwNpM3LulbPTBQhyBSyZwuYm",
"residueAlphabet": "na"
},
"start": 6112,
"end": 6114,
"sequence": "AT"
},
"state": {
"type": "ReferenceLengthExpression",
"length": 0,
"sequence": "",
"repeatSubunitLength": 2
}
},
"relations": [
{
"primaryCoding": {
"code": "liftover_to",
"system": "ga4gh-gks-term:allele-relation"
}
},
{
"primaryCoding": {
"code": "transcribed_to",
"system": "http://www.sequenceontology.org",
"iris": [
"http://www.sequenceontology.org/browser/current_release/term/transcribed_to"
]
}
}
]
}
],
"mappings": [
{
"coding": {
"system": "https://www.ncbi.nlm.nih.gov/clinvar",
"code": "662001",
"iris": [
"https://www.ncbi.nlm.nih.gov/clinvar/variation/662001"
]
},
"relation": "exactMatch"
},
{
"coding": {
"system": "https://reg.clinicalgenome.org",
"code": "CA915941124",
"iris": [
"https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_canonicalid?canonicalid=CA915941124"
]
},
"relation": "relatedMatch"
},
{
"coding": {
"system": "https://varsome.com",
"code": "hg38/rs1570942058",
"iris": [
"https://varsome.com/variant/hg38/rs1570942058"
]
},
"relation": "relatedMatch"
},
{
"coding": {
"system": "https://www.ncbi.nlm.nih.gov/snp",
"code": "rs1570942058",
"iris": [
"https://www.ncbi.nlm.nih.gov/snp/rs1570942058"
]
},
"relation": "relatedMatch"
}
],
"members": [
{
"id": "ga4gh:VA.PN-6_l2_yI1UPBRCtFnWkR52iZXKVJ8b",
"type": "Allele",
"name": "NC_000001.11:g.11128044_11128045del",
"description": "VRS variation of NC_000001.11:g.11128044_11128045del, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
"extensions": [
{
"name": "clinvar_vcf",
"value": "1-11128043-CAT-C"
}
],
"digest": "PN-6_l2_yI1UPBRCtFnWkR52iZXKVJ8b",
"expressions": [
{
"syntax": "spdi",
"value": "NC_000001.11:11128043:AT:"
},
{
"syntax": "hgvs.g",
"value": "NC_000001.11:g.11128044_11128045del"
},
{
"syntax": "hgvs.c",
"value": "NM_004958.4:c.5992_5993del"
},
{
"syntax": "hgvs.p",
"value": "NP_004949.1:p.Met1998fs"
}
],
"location": {
"id": "ga4gh:SL.5-SKfXZ941W7JbZW3UmQKtijyUfd6d7z",
"description": "Genomic positions 11,128,044 to 11,128,045 on chromosome 1 (refseq:NC_000001.11, GRCh38).",
"extensions": [
{
"name": "GRCh38 1-based, inclusive interval",
"value": "chr1:11128044-11128045",
"description": "Genomic positions 11,128,044 to 11,128,045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 1-based, inclusive interval notation."
},
{
"name": "GRCh38 0-based, half-open interval",
"value": "chr1:11128043-11128045",
"description": "Genomic positions 11,128,043 to 11,128,045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 0-based, half-open interval notation."
}
],
"type": "SequenceLocation",
"digest": "5-SKfXZ941W7JbZW3UmQKtijyUfd6d7z",
"sequenceReference": {
"id": "refseq:NC_000001.11",
"name": "NC_000001.11",
"description": "Reference sequence for GRCh38 chromosome 1.",
"aliases": [
"GRCh38:1",
"GRCh38:chr1",
"ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO"
],
"type": "SequenceReference",
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
"residueAlphabet": "na"
},
"start": 11128043,
"end": 11128045,
"sequence": "AT"
},
"state": {
"type": "ReferenceLengthExpression",
"length": 0,
"sequence": "",
"repeatSubunitLength": 2
}
},
{
"id": "ga4gh:VA.0TMQdMT2OBisJ9FI4tkzaBtxGB7r8FfJ",
"type": "Allele",
"name": "NM_004958.4:c.5992_5993del",
"description": "VRS variation of NM_004958.4:c.5992_5993del, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
"extensions": [
{
"name": "clinvar vcf",
"value": "1-11128043-CAT-C"
}
],
"digest": "0TMQdMT2OBisJ9FI4tkzaBtxGB7r8FfJ",
"expressions": [
{
"syntax": "spdi",
"value": "NC_000001.11:11128043:AT:"
},
{
"syntax": "hgvs.g",
"value": "NC_000001.11:g.11128044_11128045del"
},
{
"syntax": "hgvs.c",
"value": "NM_004958.4:c.5992_5993del"
},
{
"syntax": "hgvs.p",
"value": "NP_004949.1:p.Met1998fs"
}
],
"location": {
"id": "ga4gh:SL.gLe4d4Seuxn5fLCfO_2g34gF86vpGvo1",
"description": "Coding DNA (cDNA) positions 5992 to 5993 of the MTOR MANE Select coding transcript (refseq:NM_004958.4), corresponding to genomic positions 11128044 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38).",
"extensions": [
{
"name": "GRCh38 1-based, inclusive interval",
"value": "chr1:11128044-11128045",
"description": "Genomic positions 11128044 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 1-based, inclusive interval notation."
},
{
"name": "GRCh38 0-based, half-open interval",
"value": "chr1:11128043-11128045",
"description": "Genomic positions 11128043 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 0-based, half-open interval notation."
}
],
"type": "SequenceLocation",
"digest": "gLe4d4Seuxn5fLCfO_2g34gF86vpGvo1",
"sequenceReference": {
"id": "refseq:NM_004958.4",
"name": "NM_004958.4",
"description": "The MANE Select (GRCh38) coding transcript for MTOR.",
"aliases": [
"ensembl:ENST00000361445.9",
"ga4gh:SQ.QheGYEnKbwNpM3LulbPTBQhyBSyZwuYm"
],
"type": "SequenceReference",
"refgetAccession": "SQ.QheGYEnKbwNpM3LulbPTBQhyBSyZwuYm",
"residueAlphabet": "na"
},
"start": 6112,
"end": 6114,
"sequence": "AT"
},
"state": {
"type": "ReferenceLengthExpression",
"length": 0,
"sequence": "",
"repeatSubunitLength": 2
}
}
],
"maturity": "trial use"
}
NC_000001.11:g.1699974C>G
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://w3id.org/ga4gh/schema/cat-vrs/1.0.0/json/example_canonicalAllele-ex2",
"title": "example_canonicalAllele-ex2",
"type": "CategoricalVariant",
"id": "clingen:CA415424538",
"name": "NC_000001.11:g.1699974C>G",
"description": "An example canonical allele.",
"aliases": [
"NC_000001.11:g.1699974C>G",
"NC_000001.10:g.1631413C>G"
],
"extensions": [
{
"name": "cytogenetic location",
"value": "1p36.33"
},
{
"name": "hgvs list",
"value": [
{
"nucleotideExpression": {
"syntax": "hgvs.g",
"value": "NC_000001.11:g.1699974C>G"
}
},
{
"nucleotideExpression": {
"syntax": "hgvs.g",
"value": "NC_000001.10:g.1631413C>G"
}
}
]
}
],
"constraints": [
{
"type": "DefiningAlleleConstraint",
"allele": {
"id": "ga4gh:VA.WjRY1EXgci6Nd0M2yI86Ue_OOnfnb-Ss",
"type": "Allele",
"name": "NC_000001.11:g.1699974C>G",
"description": "VRS Allele of NC_000001.11:g.1699974C>G, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
"digest": "WjRY1EXgci6Nd0M2yI86Ue_OOnfnb-Ss",
"expressions": [
{
"syntax": "hgvs.g",
"value": "NC_000001.11:g.1699974C>G"
}
],
"location": {
"id": "ga4gh:SL.YmbtK6FLqlk2bLjpFwH-y7k4RLq_t7L3",
"description": "Genomic position 1699974 on chromosome 1 (refseq:NC_000001.11, GRCh38).",
"extensions": [
{
"name": "GRCh38 1-based, genomic position",
"value": "chr1:1699974",
"description": "Genomic position 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38)."
},
{
"name": "GRCh38 1-based, inclusive interval notation",
"value": "chr1:1699974-1699974",
"description": "Genomic positions 1,699,974 to 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38); 1-based, inclusive interval notation."
},
{
"name": "GRCh38 0-based, half-open interval notation",
"value": "chr1:1699973-1699974",
"description": "Genomic positions 1,699,973 to 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38); 0-based, half-open interval notation."
}
],
"type": "SequenceLocation",
"digest": "YmbtK6FLqlk2bLjpFwH-y7k4RLq_t7L3",
"sequenceReference": {
"id": "refseq:NC_000001.11",
"name": "NC_000001.11",
"description": "Reference sequence for GRCh38 chromosome 1.",
"aliases": [
"GRCh38:1",
"GRCh38:chr1",
"ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO"
],
"type": "SequenceReference",
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
"residueAlphabet": "na"
},
"start": 1699973,
"end": 1699974,
"sequence": "C"
},
"state": {
"type": "LiteralSequenceExpression",
"sequence": "G"
}
},
"relations": [
{
"primaryCoding": {
"code": "liftover_to",
"system": "ga4gh-gks-term:allele-relation"
}
},
{
"primaryCoding": {
"code": "transcribed_to",
"system": "http://www.sequenceontology.org",
"iris": [
"http://www.sequenceontology.org/browser/current_release/term/transcribed_to"
]
}
}
]
}
],
"mappings": [
{
"coding": {
"system": "https://reg.clinicalgenome.org/",
"code": "CA415424538",
"iris": [
"https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=CA415424538"
]
},
"relation": "exactMatch"
},
{
"coding": {
"system": "https://www.ncbi.nlm.nih.gov/snp/",
"code": "rs1391950675",
"iris": [
"https://www.ncbi.nlm.nih.gov/snp/rs1391950675"
]
},
"relation": "relatedMatch"
},
{
"coding": {
"system": "https://gnomad.broadinstitute.org",
"code": "1-1631413-C-G",
"iris": [
"https://gnomad.broadinstitute.org/variant/1-1631413-C-G?dataset=gnomad_r2_1"
],
"extensions": [
{
"name": "dataset",
"value": "gnomad_r2_1"
}
]
},
"relation": "relatedMatch"
},
{
"coding": {
"system": "https://gnomad.broadinstitute.org",
"code": "1-1699974-C-G",
"iris": [
"https://gnomad.broadinstitute.org/variant/1-1699974-C-G?dataset=gnomad_r3"
],
"extensions": [
{
"name": "dataset",
"value": "gnomad_r3"
}
]
},
"relation": "relatedMatch"
},
{
"coding": {
"system": "https://gnomad.broadinstitute.org",
"code": "chr1-1699974-C-G",
"iris": [
"https://gnomad.broadinstitute.org/variant/chr1-1699974-C-G?dataset=gnomad_r4"
],
"extensions": [
{
"name": "dataset",
"value": "gnomad_r4"
}
]
},
"relation": "relatedMatch"
}
],
"members": [
{
"id": "ga4gh:VA.WjRY1EXgci6Nd0M2yI86Ue_OOnfnb-Ss",
"type": "Allele",
"name": "NC_000001.11:g.1699974C>G",
"description": "VRS variation of NC_000001.11:g.1699974C>G, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
"digest": "WjRY1EXgci6Nd0M2yI86Ue_OOnfnb-Ss",
"expressions": [
{
"syntax": "hgvs.g",
"value": "NC_000001.11:g.1699974C>G"
}
],
"location": {
"id": "ga4gh:SL.YmbtK6FLqlk2bLjpFwH-y7k4RLq_t7L3",
"description": "Genomic position 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38).",
"type": "SequenceLocation",
"extensions": [
{
"name": "GRCh38 1-based, genomic position",
"value": "chr1:1699974",
"description": "Genomic position 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38)."
},
{
"name": "GRCh38 1-based, inclusive interval notation",
"value": "chr1:1699974-1699974",
"description": "Genomic positions 1,699,974 to 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38); 1-based, inclusive interval notation."
},
{
"name": "GRCh38 0-based, half-open interval notation",
"value": "chr1:1699973-1699974",
"description": "Genomic positions 1,699,973 to 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38); 0-based, half-open interval notation."
}
],
"digest": "YmbtK6FLqlk2bLjpFwH-y7k4RLq_t7L3",
"sequenceReference": {
"id": "refseq:NC_000001.11",
"name": "NC_000001.11",
"description": "Reference sequence for GRCh38 chromosome 1.",
"aliases": [
"GRCh38:1",
"GRCh38:chr1",
"ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO"
],
"type": "SequenceReference",
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
"residueAlphabet": "na"
},
"start": 1699973,
"end": 1699974,
"sequence": "C"
},
"state": {
"type": "LiteralSequenceExpression",
"sequence": "G"
}
},
{
"id": "ga4gh:VA.WW0r6Dh-z_ftn9yS-i0f1Y62JNCpVed2",
"type": "Allele",
"name": "NC_000001.10:g.1631413C>G",
"description": "VRS variation of NC_000001.10:g.1631413C>G, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
"digest": "WW0r6Dh-z_ftn9yS-i0f1Y62JNCpVed2",
"expressions": [
{
"syntax": "hgvs.g",
"value": "NC_000001.10:g.1631413C>G"
}
],
"location": {
"id": "ga4gh:SL.QsC4TgLsfR6-TatBj1sVKAgfv9bGYSxb",
"description": "Genomic position 1,631,413 on chromosome 1 (refseq:NC_000001.10, GRCh37).",
"type": "SequenceLocation",
"extensions": [
{
"name": "GRCh37 1-based, genomic position",
"value": "chr1:1631413",
"description": "Genomic position 1,631,413 on chromosome 1 (refseq:NC_000001.10, GRCh37)."
},
{
"name": "GRCh37 1-based, inclusive interval notation",
"value": "chr1:1631413-1631413",
"description": "Genomic positions 1,631,413 to 1,631,413 on chromosome 1 (refseq:NC_000001.10, GRCh37); 1-based, inclusive interval notation."
},
{
"name": "GRCh37 0-based, half-open interval notation",
"value": "chr1:1631412-1631413",
"description": "Genomic positions 1,631,412 to 1,631,413 on chromosome 1 (refseq:NC_000001.10, GRCh37); 0-based, half-open interval notation."
}
],
"digest": "QsC4TgLsfR6-TatBj1sVKAgfv9bGYSxb",
"sequenceReference": {
"id": "refseq:NC_000001.10",
"name": "NC_000001.10",
"description": "Reference sequence for GRCh37 chromosome 1.",
"aliases": [
"GRCh37.1",
"GRCh37:chr1",
"ga4gh:SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU"
],
"type": "SequenceReference",
"refgetAccession": "SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU",
"residueAlphabet": "na"
},
"start": 1631412,
"end": 1631413,
"sequence": "C"
},
"state": {
"type": "LiteralSequenceExpression",
"sequence": "G"
}
}
],
"maturity": "trial use"
}