Home Install User Guide Python Notebook API FAQ

How to run Cell Ranger from Notebook

runcellranger

If you have installed Cell Ranger, you can use it to process FASTQ files in Python Notebook.

Function to run cell ranger

In [ ]:
import subprocess, gzip;
from subprocess import Popen, PIPE,STDOUT
def runCellRange(workspace,fastq_path,samples,expect_cells,transcriptpath,run_name):
    run_name=run_name.replace(" ","_");
    resultfile ="";
    pcheck = subprocess.Popen("cellranger", shell=True, stdin=PIPE, stdout=PIPE,stderr=STDOUT)
    output =pcheck.stdout.read();
    output=str(output)
    if "command not found" in output:
        print("Please install cellranger 3.0");
        print("Tutorial: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/installation")
        print("If you have counts data. please skip this step.")
        return ""
    if not os.path.isdir(workspace):
        print("workspace is not a dir");
        return;

    if len(samples) ==0 :
        print("Please input samples");
        return;
    if not os.path.isdir(fastq_path):
        print("fastq path is not a dir");
        return;
    ResPath = workspace+"/"+run_name;
    if os.path.isdir(ResPath):
        print("run name is already in workspace");
        return;
    else:
        os.mkdir(ResPath);
        
    shstr = "";
    shstr +="wd=\"" +ResPath+  "\"\n"
    shstr +="cd ${wd}\n";
        
    for i in samples:
        jobstr="cellranger count ";
        jobstr+="--id "+i+" ";
        jobstr+="--fastqs "+fastq_path+" "
        jobstr+="--transcriptome "+transcriptpath+" ";
        jobstr+="--expect-cells "+ str(expect_cells)+" "
        jobstr+="--sample=\""+i+"\"";

        shstr+=jobstr+"\n";


    if len(samples) >1:
        csvstr=["library_id,molecule_h5"]
        for i in samples:
            temp=i+","+ResPath+"/"+i+"/outs/molecule_info.h5"
            csvstr.append(temp)

            csvstr="\n".join(csvstr);
            
        csvf=ResPath+"/"+run_name+".csv"
        with open(csvf,"w") as f:
            f.write(csvstr)
                
        shstr+="cellranger aggr "
        shstr+="--id aggr " 
        #--csv=test.csv --normalize=mapped
        shstr+= "--csv="+csvf+" --normalize=mapped"

        resultfile = "aggr";

    else:
        resultfile =samples[0]
            
    shfile=ResPath+"/"+run_name+".sh"
    with open(shfile,"w") as f:
        f.write(shstr)
            
    command = "bash "+shfile;
        
    print("it will take a few hours . please wait.....")
    prun = subprocess.Popen(command, shell=True, stdin=PIPE, stdout=PIPE,stderr=STDOUT)
    output =prun.stdout.read();
    print(output)
    print("---------------------------------------------------------------------------------");
    print("finish");
    resultpath = ResPath+"/"+resultfile;
    print("results path: "+resultpath);
    CountsFile=resultpath+"/outs/filtered_feature_bc_matrix/";
        
    print("counts file path: "+ CountsFile);
        
    return CountsFile;
    

Run cell ranger

In [ ]:
import scpipeline
p=scpipeline.ProcessPipline();
p.runCellRange(
    workspace="/path/to/workspace",#absolute address.
    fastq_path="/path/to/fastqfile",
    samples="", 
    expect_cells=4000, # cell ranger requirement
    transcriptpath="path to transcriptpath",
    run_name="test_run", #result file will save to workspace+"/"+run_name
)
In [ ]: