Canonical Allele

Definition and Information Model

Note

This data class is at a trial use maturity level and may change in future releases. Maturity levels are described in the GKS Maturity Model.

Computational Definition

A canonical allele is defined by an Allele that is representative of a collection of congruent Alleles, each of which depict the same nucleic acid on different underlying reference sequences. Congruent representations of an Allele often exist across different genome assemblies and associated cDNA transcript representations.

Information Model

A CanonicalAllele is a Categorical Variant with exactly one constraint:

  1. A Defining Allele Constraint with the relations array containing both liftover_to and transcribed_to codes. This constraint MUST refer to a genomic variant for the allele.

Examples

The following are example implementations of that satisfy the CanonicalAllele recipe:

NM_004958.4(MTOR):c.5992_5993del (p.Met1998fs)
{
   "$schema": "https://json-schema.org/draft/2020-12/schema",
   "$id": "https://w3id.org/ga4gh/schema/cat-vrs/1.0.0/json/example_canonicalAllele-ex1",
   "title": "example_canonicalAllele-ex1",
   "type": "CategoricalVariant",
   "id": "clinvar:662001",
   "name": "NM_004958.4(MTOR):c.5992_5993del (p.Met1998fs)",
   "description": "An example canonical allele.",
   "aliases": [
      "NM_004958.4:c.5992_5993del",
      "NC_000001.11:g.11128044_11128045del",
      "NC_000001.10:g.11188101_11188102del",
      "NP_004949.3:p.Met1998fs",
      "NG_033239.1:g.139507_139508del",
      "LRG_734:g.139507_139508del",
      "LRG_734t1:c.5992_5993del"
   ],
   "extensions": [
      {
         "name": "cytogenetic location",
         "value": "1p36.22"
      },
      {
         "name": "clinvar variation type",
         "value": "Deletion"
      },
      {
         "name": "hgvs list",
         "value": [
            {
               "nucleotideExpression": {
                  "syntax": "hgvs.g",
                  "value": "NC_000001.11:g.11128044_11128045del"
               },
               "nucleotideType": "genomic"
            },
            {
               "nucleotideExpression": {
                  "syntax": "hgvs.c",
                  "value": "NM_004958.4:c.5992_5993del"
               },
               "nucleotideType": "coding",
               "maneSelect": true,
               "proteinExpression": {
                  "syntax": "hgvs.p",
                  "value": "NP_004949.3:p.Met1998fs"
               },
               "molecularConsequence": {
                  "name": "frameshift_variant",
                  "system": "http://www.sequenceontology.org/browser/",
                  "systemVersion": "release_2.5.3",
                  "code": "SO:0001589",
                  "iris": [
                     "http://www.sequenceontology.org/browser/release_2.5.3/term/SO:0001589"
                  ]
               }
            },
            {
               "nucleotideExpression": {
                  "syntax": "hgvs.g",
                  "value": "NG_033239.1:g.139507_139508del"
               },
               "nucleotideType": "genomic"
            },
            {
               "nucleotideExpression": {
                  "syntax": "hgvs.g",
                  "value": "LRG_734:g.139507_139508del"
               },
               "nucleotideType": "genomic"
            },
            {
               "nucleotideExpression": {
                  "syntax": "hgvs.c",
                  "value": "LRG_734t1:c.5992_5993del"
               },
               "nucleotideType": "coding"
            }
         ]
      }
   ],
   "constraints": [
      {
         "type": "DefiningAlleleConstraint",
         "allele": {
            "id": "ga4gh:VA.0TMQdMT2OBisJ9FI4tkzaBtxGB7r8FfJ",
            "type": "Allele",
            "name": "NM_004958.4:c.5992_5993del",
            "description": "VRS variation of NM_004958.4:c.5992_5993del, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
            "extensions": [
               {
                  "name": "clinvar vcf",
                  "value": "1-11128043-CAT-C"
               }
            ],
            "digest": "0TMQdMT2OBisJ9FI4tkzaBtxGB7r8FfJ",
            "expressions": [
               {
                  "syntax": "spdi",
                  "value": "NC_000001.11:11128043:AT:"
               },
               {
                  "syntax": "hgvs.g",
                  "value": "NC_000001.11:g.11128044_11128045del"
               },
               {
                  "syntax": "hgvs.c",
                  "value": "NM_004958.4:c.5992_5993del"
               },
               {
                  "syntax": "hgvs.p",
                  "value": "NP_004949.1:p.Met1998fs"
               }
            ],
            "location": {
               "id": "ga4gh:SL.gLe4d4Seuxn5fLCfO_2g34gF86vpGvo1",
               "description": "Coding DNA (cDNA) positions 5992 to 5993 of the MTOR MANE Select coding transcript (refseq:NM_004958.4), corresponding to genomic positions 11128044 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38).",
               "extensions": [
                  {
                     "name": "GRCh38 1-based, inclusive interval",
                     "value": "chr1:11128044-11128045",
                     "description": "Genomic positions 11128044 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 1-based, inclusive interval notation."
                  },
                  {
                     "name": "GRCh38 0-based, half-open interval",
                     "value": "chr1:11128043-11128045",
                     "description": "Genomic positions 11128043 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 0-based, half-open interval notation."
                  }
               ],
               "type": "SequenceLocation",
               "digest": "gLe4d4Seuxn5fLCfO_2g34gF86vpGvo1",
               "sequenceReference": {
                  "id": "refseq:NM_004958.4",
                  "name": "NM_004958.4",
                  "description": "The MANE Select (GRCh38) coding transcript for MTOR.",
                  "aliases": [
                     "ensembl:ENST00000361445.9",
                     "ga4gh:SQ.QheGYEnKbwNpM3LulbPTBQhyBSyZwuYm"
                  ],
                  "type": "SequenceReference",
                  "refgetAccession": "SQ.QheGYEnKbwNpM3LulbPTBQhyBSyZwuYm",
                  "residueAlphabet": "na"
               },
               "start": 6112,
               "end": 6114,
               "sequence": "AT"
            },
            "state": {
               "type": "ReferenceLengthExpression",
               "length": 0,
               "sequence": "",
               "repeatSubunitLength": 2
            }
         },
         "relations": [
            {
               "primaryCoding": {
                  "code": "liftover_to",
                  "system": "ga4gh-gks-term:allele-relation"
               }
            },
            {
               "primaryCoding": {
                  "code": "transcribed_to",
                  "system": "http://www.sequenceontology.org",
                  "iris": [
                     "http://www.sequenceontology.org/browser/current_release/term/transcribed_to"
                  ]
               }
            }
         ]
      }
   ],
   "mappings": [
      {
         "coding": {
            "system": "https://www.ncbi.nlm.nih.gov/clinvar",
            "code": "662001",
            "iris": [
               "https://www.ncbi.nlm.nih.gov/clinvar/variation/662001"
            ]
         },
         "relation": "exactMatch"
      },
      {
         "coding": {
            "system": "https://reg.clinicalgenome.org",
            "code": "CA915941124",
            "iris": [
               "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_canonicalid?canonicalid=CA915941124"
            ]
         },
         "relation": "relatedMatch"
      },
      {
         "coding": {
            "system": "https://varsome.com",
            "code": "hg38/rs1570942058",
            "iris": [
               "https://varsome.com/variant/hg38/rs1570942058"
            ]
         },
         "relation": "relatedMatch"
      },
      {
         "coding": {
            "system": "https://www.ncbi.nlm.nih.gov/snp",
            "code": "rs1570942058",
            "iris": [
               "https://www.ncbi.nlm.nih.gov/snp/rs1570942058"
            ]
         },
         "relation": "relatedMatch"
      }
   ],
   "members": [
      {
         "id": "ga4gh:VA.PN-6_l2_yI1UPBRCtFnWkR52iZXKVJ8b",
         "type": "Allele",
         "name": "NC_000001.11:g.11128044_11128045del",
         "description": "VRS variation of NC_000001.11:g.11128044_11128045del, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
         "extensions": [
            {
               "name": "clinvar_vcf",
               "value": "1-11128043-CAT-C"
            }
         ],
         "digest": "PN-6_l2_yI1UPBRCtFnWkR52iZXKVJ8b",
         "expressions": [
            {
               "syntax": "spdi",
               "value": "NC_000001.11:11128043:AT:"
            },
            {
               "syntax": "hgvs.g",
               "value": "NC_000001.11:g.11128044_11128045del"
            },
            {
               "syntax": "hgvs.c",
               "value": "NM_004958.4:c.5992_5993del"
            },
            {
               "syntax": "hgvs.p",
               "value": "NP_004949.1:p.Met1998fs"
            }
         ],
         "location": {
            "id": "ga4gh:SL.5-SKfXZ941W7JbZW3UmQKtijyUfd6d7z",
            "description": "Genomic positions 11,128,044 to 11,128,045 on chromosome 1 (refseq:NC_000001.11, GRCh38).",
            "extensions": [
               {
                  "name": "GRCh38 1-based, inclusive interval",
                  "value": "chr1:11128044-11128045",
                  "description": "Genomic positions 11,128,044 to 11,128,045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 1-based, inclusive interval notation."
               },
               {
                  "name": "GRCh38 0-based, half-open interval",
                  "value": "chr1:11128043-11128045",
                  "description": "Genomic positions 11,128,043 to 11,128,045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 0-based, half-open interval notation."
               }
            ],
            "type": "SequenceLocation",
            "digest": "5-SKfXZ941W7JbZW3UmQKtijyUfd6d7z",
            "sequenceReference": {
               "id": "refseq:NC_000001.11",
               "name": "NC_000001.11",
               "description": "Reference sequence for GRCh38 chromosome 1.",
               "aliases": [
                  "GRCh38:1",
                  "GRCh38:chr1",
                  "ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO"
               ],
               "type": "SequenceReference",
               "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
               "residueAlphabet": "na"
            },
            "start": 11128043,
            "end": 11128045,
            "sequence": "AT"
         },
         "state": {
            "type": "ReferenceLengthExpression",
            "length": 0,
            "sequence": "",
            "repeatSubunitLength": 2
         }
      },
      {
         "id": "ga4gh:VA.0TMQdMT2OBisJ9FI4tkzaBtxGB7r8FfJ",
         "type": "Allele",
         "name": "NM_004958.4:c.5992_5993del",
         "description": "VRS variation of NM_004958.4:c.5992_5993del, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
         "extensions": [
            {
               "name": "clinvar vcf",
               "value": "1-11128043-CAT-C"
            }
         ],
         "digest": "0TMQdMT2OBisJ9FI4tkzaBtxGB7r8FfJ",
         "expressions": [
            {
               "syntax": "spdi",
               "value": "NC_000001.11:11128043:AT:"
            },
            {
               "syntax": "hgvs.g",
               "value": "NC_000001.11:g.11128044_11128045del"
            },
            {
               "syntax": "hgvs.c",
               "value": "NM_004958.4:c.5992_5993del"
            },
            {
               "syntax": "hgvs.p",
               "value": "NP_004949.1:p.Met1998fs"
            }
         ],
         "location": {
            "id": "ga4gh:SL.gLe4d4Seuxn5fLCfO_2g34gF86vpGvo1",
            "description": "Coding DNA (cDNA) positions 5992 to 5993 of the MTOR MANE Select coding transcript (refseq:NM_004958.4), corresponding to genomic positions 11128044 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38).",
            "extensions": [
               {
                  "name": "GRCh38 1-based, inclusive interval",
                  "value": "chr1:11128044-11128045",
                  "description": "Genomic positions 11128044 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 1-based, inclusive interval notation."
               },
               {
                  "name": "GRCh38 0-based, half-open interval",
                  "value": "chr1:11128043-11128045",
                  "description": "Genomic positions 11128043 to 11128045 on chromosome 1 (refseq:NC_000001.11, GRCh38); 0-based, half-open interval notation."
               }
            ],
            "type": "SequenceLocation",
            "digest": "gLe4d4Seuxn5fLCfO_2g34gF86vpGvo1",
            "sequenceReference": {
               "id": "refseq:NM_004958.4",
               "name": "NM_004958.4",
               "description": "The MANE Select (GRCh38) coding transcript for MTOR.",
               "aliases": [
                  "ensembl:ENST00000361445.9",
                  "ga4gh:SQ.QheGYEnKbwNpM3LulbPTBQhyBSyZwuYm"
               ],
               "type": "SequenceReference",
               "refgetAccession": "SQ.QheGYEnKbwNpM3LulbPTBQhyBSyZwuYm",
               "residueAlphabet": "na"
            },
            "start": 6112,
            "end": 6114,
            "sequence": "AT"
         },
         "state": {
            "type": "ReferenceLengthExpression",
            "length": 0,
            "sequence": "",
            "repeatSubunitLength": 2
         }
      }
   ],
   "maturity": "trial use"
}
NC_000001.11:g.1699974C>G
{
   "$schema": "https://json-schema.org/draft/2020-12/schema",
   "$id": "https://w3id.org/ga4gh/schema/cat-vrs/1.0.0/json/example_canonicalAllele-ex2",
   "title": "example_canonicalAllele-ex2",
   "type": "CategoricalVariant",
   "id": "clingen:CA415424538",
   "name": "NC_000001.11:g.1699974C>G",
   "description": "An example canonical allele.",
   "aliases": [
      "NC_000001.11:g.1699974C>G",
      "NC_000001.10:g.1631413C>G"
   ],
   "extensions": [
      {
         "name": "cytogenetic location",
         "value": "1p36.33"
      },
      {
         "name": "hgvs list",
         "value": [
            {
               "nucleotideExpression": {
                  "syntax": "hgvs.g",
                  "value": "NC_000001.11:g.1699974C>G"
               }
            },
            {
               "nucleotideExpression": {
                  "syntax": "hgvs.g",
                  "value": "NC_000001.10:g.1631413C>G"
               }
            }
         ]
      }
   ],
   "constraints": [
      {
         "type": "DefiningAlleleConstraint",
         "allele": {
            "id": "ga4gh:VA.WjRY1EXgci6Nd0M2yI86Ue_OOnfnb-Ss",
            "type": "Allele",
            "name": "NC_000001.11:g.1699974C>G",
            "description": "VRS Allele of NC_000001.11:g.1699974C>G, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
            "digest": "WjRY1EXgci6Nd0M2yI86Ue_OOnfnb-Ss",
            "expressions": [
               {
                  "syntax": "hgvs.g",
                  "value": "NC_000001.11:g.1699974C>G"
               }
            ],
            "location": {
               "id": "ga4gh:SL.YmbtK6FLqlk2bLjpFwH-y7k4RLq_t7L3",
               "description": "Genomic position 1699974 on chromosome 1 (refseq:NC_000001.11, GRCh38).",
               "extensions": [
                  {
                     "name": "GRCh38 1-based, genomic position",
                     "value": "chr1:1699974",
                     "description": "Genomic position 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38)."
                  },
                  {
                     "name": "GRCh38 1-based, inclusive interval notation",
                     "value": "chr1:1699974-1699974",
                     "description": "Genomic positions 1,699,974 to 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38); 1-based, inclusive interval notation."
                  },
                  {
                     "name": "GRCh38 0-based, half-open interval notation",
                     "value": "chr1:1699973-1699974",
                     "description": "Genomic positions 1,699,973 to 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38); 0-based, half-open interval notation."
                  }
               ],
               "type": "SequenceLocation",
               "digest": "YmbtK6FLqlk2bLjpFwH-y7k4RLq_t7L3",
               "sequenceReference": {
                  "id": "refseq:NC_000001.11",
                  "name": "NC_000001.11",
                  "description": "Reference sequence for GRCh38 chromosome 1.",
                  "aliases": [
                     "GRCh38:1",
                     "GRCh38:chr1",
                     "ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO"
                  ],
                  "type": "SequenceReference",
                  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
                  "residueAlphabet": "na"
               },
               "start": 1699973,
               "end": 1699974,
               "sequence": "C"
            },
            "state": {
               "type": "LiteralSequenceExpression",
               "sequence": "G"
            }
         },
         "relations": [
            {
               "primaryCoding": {
                  "code": "liftover_to",
                  "system": "ga4gh-gks-term:allele-relation"
               }
            },
            {
               "primaryCoding": {
                  "code": "transcribed_to",
                  "system": "http://www.sequenceontology.org",
                  "iris": [
                     "http://www.sequenceontology.org/browser/current_release/term/transcribed_to"
                  ]
               }
            }
         ]
      }
   ],
   "mappings": [
      {
         "coding": {
            "system": "https://reg.clinicalgenome.org/",
            "code": "CA415424538",
            "iris": [
               "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=CA415424538"
            ]
         },
         "relation": "exactMatch"
      },
      {
         "coding": {
            "system": "https://www.ncbi.nlm.nih.gov/snp/",
            "code": "rs1391950675",
            "iris": [
               "https://www.ncbi.nlm.nih.gov/snp/rs1391950675"
            ]
         },
         "relation": "relatedMatch"
      },
      {
         "coding": {
            "system": "https://gnomad.broadinstitute.org",
            "code": "1-1631413-C-G",
            "iris": [
               "https://gnomad.broadinstitute.org/variant/1-1631413-C-G?dataset=gnomad_r2_1"
            ],
            "extensions": [
               {
                  "name": "dataset",
                  "value": "gnomad_r2_1"
               }
            ]
         },
         "relation": "relatedMatch"
      },
      {
         "coding": {
            "system": "https://gnomad.broadinstitute.org",
            "code": "1-1699974-C-G",
            "iris": [
               "https://gnomad.broadinstitute.org/variant/1-1699974-C-G?dataset=gnomad_r3"
            ],
            "extensions": [
               {
                  "name": "dataset",
                  "value": "gnomad_r3"
               }
            ]
         },
         "relation": "relatedMatch"
      },
      {
         "coding": {
            "system": "https://gnomad.broadinstitute.org",
            "code": "chr1-1699974-C-G",
            "iris": [
               "https://gnomad.broadinstitute.org/variant/chr1-1699974-C-G?dataset=gnomad_r4"
            ],
            "extensions": [
               {
                  "name": "dataset",
                  "value": "gnomad_r4"
               }
            ]
         },
         "relation": "relatedMatch"
      }
   ],
   "members": [
      {
         "id": "ga4gh:VA.WjRY1EXgci6Nd0M2yI86Ue_OOnfnb-Ss",
         "type": "Allele",
         "name": "NC_000001.11:g.1699974C>G",
         "description": "VRS variation of NC_000001.11:g.1699974C>G, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
         "digest": "WjRY1EXgci6Nd0M2yI86Ue_OOnfnb-Ss",
         "expressions": [
            {
               "syntax": "hgvs.g",
               "value": "NC_000001.11:g.1699974C>G"
            }
         ],
         "location": {
            "id": "ga4gh:SL.YmbtK6FLqlk2bLjpFwH-y7k4RLq_t7L3",
            "description": "Genomic position 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38).",
            "type": "SequenceLocation",
            "extensions": [
               {
                  "name": "GRCh38 1-based, genomic position",
                  "value": "chr1:1699974",
                  "description": "Genomic position 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38)."
               },
               {
                  "name": "GRCh38 1-based, inclusive interval notation",
                  "value": "chr1:1699974-1699974",
                  "description": "Genomic positions 1,699,974 to 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38); 1-based, inclusive interval notation."
               },
               {
                  "name": "GRCh38 0-based, half-open interval notation",
                  "value": "chr1:1699973-1699974",
                  "description": "Genomic positions 1,699,973 to 1,699,974 on chromosome 1 (refseq:NC_000001.11, GRCh38); 0-based, half-open interval notation."
               }
            ],
            "digest": "YmbtK6FLqlk2bLjpFwH-y7k4RLq_t7L3",
            "sequenceReference": {
               "id": "refseq:NC_000001.11",
               "name": "NC_000001.11",
               "description": "Reference sequence for GRCh38 chromosome 1.",
               "aliases": [
                  "GRCh38:1",
                  "GRCh38:chr1",
                  "ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO"
               ],
               "type": "SequenceReference",
               "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
               "residueAlphabet": "na"
            },
            "start": 1699973,
            "end": 1699974,
            "sequence": "C"
         },
         "state": {
            "type": "LiteralSequenceExpression",
            "sequence": "G"
         }
      },
      {
         "id": "ga4gh:VA.WW0r6Dh-z_ftn9yS-i0f1Y62JNCpVed2",
         "type": "Allele",
         "name": "NC_000001.10:g.1631413C>G",
         "description": "VRS variation of NC_000001.10:g.1631413C>G, generated with the VICC Variation Normalizer (https://github.com/cancervariants/variation-normalization).",
         "digest": "WW0r6Dh-z_ftn9yS-i0f1Y62JNCpVed2",
         "expressions": [
            {
               "syntax": "hgvs.g",
               "value": "NC_000001.10:g.1631413C>G"
            }
         ],
         "location": {
            "id": "ga4gh:SL.QsC4TgLsfR6-TatBj1sVKAgfv9bGYSxb",
            "description": "Genomic position 1,631,413 on chromosome 1 (refseq:NC_000001.10, GRCh37).",
            "type": "SequenceLocation",
            "extensions": [
               {
                  "name": "GRCh37 1-based, genomic position",
                  "value": "chr1:1631413",
                  "description": "Genomic position 1,631,413 on chromosome 1 (refseq:NC_000001.10, GRCh37)."
               },
               {
                  "name": "GRCh37 1-based, inclusive interval notation",
                  "value": "chr1:1631413-1631413",
                  "description": "Genomic positions 1,631,413 to 1,631,413 on chromosome 1 (refseq:NC_000001.10, GRCh37); 1-based, inclusive interval notation."
               },
               {
                  "name": "GRCh37 0-based, half-open interval notation",
                  "value": "chr1:1631412-1631413",
                  "description": "Genomic positions 1,631,412 to 1,631,413 on chromosome 1 (refseq:NC_000001.10, GRCh37); 0-based, half-open interval notation."
               }
            ],
            "digest": "QsC4TgLsfR6-TatBj1sVKAgfv9bGYSxb",
            "sequenceReference": {
               "id": "refseq:NC_000001.10",
               "name": "NC_000001.10",
               "description": "Reference sequence for GRCh37 chromosome 1.",
               "aliases": [
                  "GRCh37.1",
                  "GRCh37:chr1",
                  "ga4gh:SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU"
               ],
               "type": "SequenceReference",
               "refgetAccession": "SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU",
               "residueAlphabet": "na"
            },
            "start": 1631412,
            "end": 1631413,
            "sequence": "C"
         },
         "state": {
            "type": "LiteralSequenceExpression",
            "sequence": "G"
         }
      }
   ],
   "maturity": "trial use"
}

Implementation Guidance