Examples¶
Sheet Examples¶
Below is an example JSON file with a cancer sample sheet.
Note that the sheet only contains one donor with two bio samples (normal sample N1
and primary tumor T1
).
{
"identifier": "file://examples/example_cancer_matched.tsv",
"title": "Cancer Sample Sheet",
"description": "Sample Sheet constructed from cancer matched samples compact TSV file",
"extraInfoDefs": {
"bioEntity": {
"ncbiTaxon": {
"docs": "Reference to NCBI taxonomy",
"key": "taxon",
"type": "string",
"pattern": "^NCBITaxon_[1-9][0-9]*$"
}
},
"bioSample": {
"isTumor": {
"docs": "Boolean flag for distinguishing tumor/normal samples",
"key": "isTumor",
"type": "boolean"
}
},
"testSample": {
"extractionType": {
"docs": "Describes extracted",
"key": "extractionType",
"type": "enum",
"choices": [
"DNA",
"RNA",
"other"
]
}
},
"ngsLibrary": {
"seqPlatform": {
"docs": "Sequencing platform used",
"key": "kitName",
"type": "enum",
"choices": [
"Illumina",
"PacBio",
"other"
]
},
"libraryType": {
"docs": "Rough classificiation of the library type",
"key": "libraryType",
"type": "enum",
"choices": [
"Panel-seq",
"WES",
"WGS",
"mRNA-seq",
"tRNA-seq",
"other"
]
},
"folderName": {
"docs": "Name of folder with FASTQ files",
"key": "folderName",
"type": "string"
}
}
},
"bioEntities": {
"P001": {
"pk": 1,
"extraInfo": {
"ncbiTaxon": "NCBITaxon_9606"
},
"bioSamples": {
"N1": {
"pk": 2,
"extraInfo": {
"isTumor": false
},
"testSamples": {
"DNA1": {
"pk": 3,
"extraInfo": {
"extractionType": "DNA"
},
"ngsLibraries": {
"WES1": {
"pk": 4,
"extraInfo": {
"seqPlatform": "Illumina",
"folderName": "P001-N1-DNA1-WES1",
"libraryType": "WES"
}
}
}
}
}
},
"T1": {
"pk": 5,
"extraInfo": {
"isTumor": true
},
"testSamples": {
"DNA1": {
"pk": 6,
"extraInfo": {
"extractionType": "DNA"
},
"ngsLibraries": {
"WES1": {
"pk": 7,
"extraInfo": {
"seqPlatform": "Illumina",
"folderName": "P001-T1-DNA1-WES1",
"libraryType": "WES"
}
}
}
},
"RNA1": {
"pk": 8,
"extraInfo": {
"extractionType": "RNA"
},
"ngsLibraries": {
"mRNA_seq1": {
"pk": 9,
"extraInfo": {
"seqPlatform": "Illumina",
"folderName": "P001-T1-RNA1-mRNAseq1",
"libraryType": "mRNA_seq"
}
}
}
}
}
}
}
}
}
}
Code Examples¶
The following Python program uses the biomedsheets
module for loading the JSON sheet from above.
It them prints the names of the donors and the names of the NGS libraries for the tumor/normal pairs.
# -*- coding: utf-8 -*-
"""Demonstrate shortcuts for cancer sample sheet
"""
import collections
import os
from biomedsheets import io, ref_resolver, shortcuts
def load_sheet():
"""Return ``Sheet`` instance for the cancer example"""
path = os.path.join(os.path.abspath(
os.path.dirname(__file__)), 'example_cancer_matched.json')
sheet_json = io.json_loads_ordered(open(path, 'rt').read())
resolver = ref_resolver.RefResolver(dict_class=collections.OrderedDict)
return io.SheetBuilder(
resolver.resolve('file://' + path, sheet_json)).run()
def main():
"""Main program entry point"""
cancer_cases = shortcuts.CancerCaseSheet(load_sheet())
print('Donors\n')
for donor in cancer_cases.donors:
print(' {}'.format(donor.name))
print('\nLibraries of all tumor/normal pairs\n')
for pair in cancer_cases.all_sample_pairs:
print(' {}'.format(pair.donor.name))
print(' normal DNA: {}'.format(pair.normal_sample.dna_ngs_library.name))
if pair.normal_sample.rna_ngs_library:
print(' normal RNA: {}'.format(pair.normal_sample.rna_ngs_library.name))
print(' tumor DNA: {}'.format(pair.tumor_sample.dna_ngs_library.name))
if pair.tumor_sample.rna_ngs_library:
print(' tumor RNA: {}'.format(pair.tumor_sample.rna_ngs_library.name))
if __name__ == '__main__':
main()
Output¶
The output of the program is as follows:
Donors
P001-000001
Libraries of all tumor/normal pairs
P001-000001
normal DNA: P001-N1-DNA1-WES1-000004
tumor DNA: P001-T1-DNA1-WES1-000007
tumor RNA: P001-T1-RNA1-mRNA_seq1-000009