Examples¶

Sheet Examples¶

Below is an example JSON file with a cancer sample sheet. Note that the sheet only contains one donor with two bio samples (normal sample N1 and primary tumor T1).

{
    "identifier": "file://examples/example_cancer_matched.tsv",
    "title": "Cancer Sample Sheet",
    "description": "Sample Sheet constructed from cancer matched samples compact TSV file",
    "extraInfoDefs": {
        "bioEntity": {
            "ncbiTaxon": {
                "docs": "Reference to NCBI taxonomy",
                "key": "taxon",
                "type": "string",
                "pattern": "^NCBITaxon_[1-9][0-9]*$"
            }
        },
        "bioSample": {
            "isTumor": {
                "docs": "Boolean flag for distinguishing tumor/normal samples",
                "key": "isTumor",
                "type": "boolean"
            }
        },
        "testSample": {
            "extractionType": {
                "docs": "Describes extracted",
                "key": "extractionType",
                "type": "enum",
                "choices": [
                    "DNA",
                    "RNA",
                    "other"
                ]
            }
        },
        "ngsLibrary": {
            "seqPlatform": {
                "docs": "Sequencing platform used",
                "key": "kitName",
                "type": "enum",
                "choices": [
                    "Illumina",
                    "PacBio",
                    "other"
                ]
            },
            "libraryType": {
                "docs": "Rough classificiation of the library type",
                "key": "libraryType",
                "type": "enum",
                "choices": [
                    "Panel-seq",
                    "WES",
                    "WGS",
                    "mRNA-seq",
                    "tRNA-seq",
                    "other"
                ]
            },
            "folderName": {
                "docs": "Name of folder with FASTQ files",
                "key": "folderName",
                "type": "string"
            }
        }
    },
    "bioEntities": {
        "P001": {
            "pk": 1,
            "extraInfo": {
                "ncbiTaxon": "NCBITaxon_9606"
            },
            "bioSamples": {
                "N1": {
                    "pk": 2,
                    "extraInfo": {
                        "isTumor": false
                    },
                    "testSamples": {
                        "DNA1": {
                            "pk": 3,
                            "extraInfo": {
                                "extractionType": "DNA"
                            },
                            "ngsLibraries": {
                                "WES1": {
                                    "pk": 4,
                                    "extraInfo": {
                                        "seqPlatform": "Illumina",
                                        "folderName": "P001-N1-DNA1-WES1",
                                        "libraryType": "WES"
                                    }
                                }
                            }
                        }
                    }
                },
                "T1": {
                    "pk": 5,
                    "extraInfo": {
                        "isTumor": true
                    },
                    "testSamples": {
                        "DNA1": {
                            "pk": 6,
                            "extraInfo": {
                                "extractionType": "DNA"
                            },
                            "ngsLibraries": {
                                "WES1": {
                                    "pk": 7,
                                    "extraInfo": {
                                        "seqPlatform": "Illumina",
                                        "folderName": "P001-T1-DNA1-WES1",
                                        "libraryType": "WES"
                                    }
                                }
                            }
                        },
                        "RNA1": {
                            "pk": 8,
                            "extraInfo": {
                                "extractionType": "RNA"
                            },
                            "ngsLibraries": {
                                "mRNA_seq1": {
                                    "pk": 9,
                                    "extraInfo": {
                                        "seqPlatform": "Illumina",
                                        "folderName": "P001-T1-RNA1-mRNAseq1",
                                        "libraryType": "mRNA_seq"
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

Code Examples¶

The following Python program uses the biomedsheets module for loading the JSON sheet from above. It them prints the names of the donors and the names of the NGS libraries for the tumor/normal pairs.

# -*- coding: utf-8 -*-
"""Demonstrate shortcuts for cancer sample sheet
"""

import collections
import os

from biomedsheets import io, ref_resolver, shortcuts


def load_sheet():
    """Return ``Sheet`` instance for the cancer example"""
    path = os.path.join(os.path.abspath(
        os.path.dirname(__file__)), 'example_cancer_matched.json')
    sheet_json = io.json_loads_ordered(open(path, 'rt').read())
    resolver = ref_resolver.RefResolver(dict_class=collections.OrderedDict)
    return io.SheetBuilder(
        resolver.resolve('file://' + path, sheet_json)).run()


def main():
    """Main program entry point"""
    cancer_cases = shortcuts.CancerCaseSheet(load_sheet())
    print('Donors\n')
    for donor in cancer_cases.donors:
        print('  {}'.format(donor.name))
    print('\nLibraries of all tumor/normal pairs\n')
    for pair in cancer_cases.all_sample_pairs:
        print('  {}'.format(pair.donor.name))
        print('    normal DNA: {}'.format(pair.normal_sample.dna_ngs_library.name))
        if pair.normal_sample.rna_ngs_library:
            print('    normal RNA: {}'.format(pair.normal_sample.rna_ngs_library.name))
        print('    tumor DNA:  {}'.format(pair.tumor_sample.dna_ngs_library.name))
        if pair.tumor_sample.rna_ngs_library:
            print('    tumor RNA:  {}'.format(pair.tumor_sample.rna_ngs_library.name))


if __name__ == '__main__':
    main()

Output¶

The output of the program is as follows:

Donors

P001-000001

Libraries of all tumor/normal pairs

  P001-000001
    normal DNA: P001-N1-DNA1-WES1-000004
    tumor DNA:  P001-T1-DNA1-WES1-000007
    tumor RNA:  P001-T1-RNA1-mRNA_seq1-000009