Everyone should probably learn to code.
This class was a ton of fun. I didn't always agree with the incredibly picky grading of syntax, I found something cathartic in coding. My weekly routine for working on assignments for this class was sitting down one night where I didn't have anything planned, queuing up 2 or 3 of my favorite albums, and then coding until I was done with the project, which could be anywhere from 2 to 5 hours. Regardless of whatever grade I got in the end, each project ended with a sense of accomplishment and something tangible that I was proud of.
I think I'll try picking up R or Python next!
I think I'll try picking up R or Python next!
One of my favorite projects: writing a code that reads files containing nucleotide sequences for DNA and reporting different statistics about them.
1 // Wylie Kau
2 // ID#: 1631185
3 // CSE 142B
4 // TA: Aaron Su
5 // Assignment 6
6
7 // This program reads a file containing a number of nucleotide sequences. After analyzing the
8 // sequence, it tells the user the name of the sequence, the nucleotides in the sequence,
9 // the number of each unique nucleotide in the molecule, the mass percent of each unique
10 // nucleotide, a list of the codons making up the sequence, and finally checks to see if
11 // the nucleotide sequence constitutes a protein.
12
13 import java.util.*;
14 import java.io.*;
15
16 public class DNA {
17 public static final int MIN_CODONS = 5; // Minimum number of codons for a protein.
18 public static final double GC_PERCENT = 30.0; // Minimum combined mass percentage for G and C.
19 public static final int[] NUS = {'A', 'C', 'G', 'T'}; // The unique nucleotides.
20 public static final int UNIQUE_NUS = NUS.length; // Number of unique nucleotides.
21 public static final int NUS_PER_CODON = 3; // Number of nucleotides per Codon.
22 public static final double[] MOL_WEIGHTS = {135.128, 111.103, 151.128, 125.107};
23 // Molecular weights for each unique nucleotide (A, C, G, T).
24 public static final double JUNK_WEIGHT = 100.00; // Molecular weight of the junk sections.
25 public static final String START_CODON = "ATG"; // Only valid start codon nucleotide sequence.
26 public static final String[] STOP_CODONS = {"TAA", "TAG", "TGA"}; // Only valid stop codon
27 // nucleotide sequences.
28
29 public static void main(String[] args) throws FileNotFoundException {
30 Scanner console = new Scanner(System.in);
31 String[] files = intro(console);
32 File input = new File(files[0]);
33 Scanner dataFile = new Scanner(input);
34 PrintStream outFile = new PrintStream(new File(files[1]));
35 while (dataFile.hasNextLine()) {
36 String protName = dataFile.nextLine();
37 String protein = dataFile.nextLine().toUpperCase();
38 int[] nucCount = getNucCount(protein);
39 double totalMass = getTotalMass(protein);
40 double[] massPercent = getMassPercent(nucCount, totalMass);
41 String[] codonList = getCodons(protein);
42 String isProtein = proteinCheck(codonList, massPercent);
43 output(outFile, protName, protein, nucCount, massPercent, totalMass, codonList, isProtein);
44 }
45 }
46
47 // This method accepts the scanner for user input as a parameter and prints out the introduction
48 // to the program, and then gets the name for the input file and the name for the output file and
49 // returns them to main in an array.
50 public static String[] intro(Scanner console) {
51 System.out.println("This program reports information about DNA");
52 System.out.println("nucleotide sequences that may encode proteins.");
53 System.out.print("Input file name? ");
54 String inName = console.next();
55 System.out.print("Output file name? ");
56 String outName = console.next();
57 String[] files = {inName, outName};
58 return files;
59 }
60
61 // This method accepts the nucleotide sequence as a String and totals the number of each unique
62 // nucleotide is present in the sequence, stores the numbers in an array, and then returns the
63 // array to main.
64 public static int[] getNucCount(String protein) {
65 int[] nucCount = new int[UNIQUE_NUS];
66 for (int i = 0; i < protein.length(); i++) {
67 char nuAcid = protein.charAt(i);
68 for (int j = 0; j < UNIQUE_NUS; j++) {
69 if (nuAcid == NUS[j]) {
70 nucCount[j]++;
71 }
72 }
73 }
74 return nucCount;
75 }
76
77 // This method accepts the nucleotide sequence as a String and then finds the total mass of the
78 // sequence by checking each nucleotide and tabulating a cumulative sum based on the individual
79 // weights for each nucleotide, including the junk sections. It then returns the value to main
80 // for further use.
81 public static double getTotalMass(String protein) {
82 double totalMass = 0.0;
83 int junkCount = 0;
84 for (int i = 0; i < protein.length(); i++) {
85 char nuAcid = protein.charAt(i);
86 for (int j = 0; j < UNIQUE_NUS; j++) {
87 if (nuAcid == NUS[j]) {
88 totalMass += MOL_WEIGHTS[j];
89 }
90 }
91 if (nuAcid == '-') {
92 totalMass += JUNK_WEIGHT;
93 }
94 }
95 return totalMass;
96 }
97
98 // This method accepts the array containing the number of each unique nucleotide and the value
99 // for the total mass of the nucleotide sequence, junk included, and then calculates the mass
100 // percent of each unique nucleotide and stores the values in an array which then is returned
101 // to main.
102 public static double[] getMassPercent(int[] nucCount, double totalMass) {
103 double [] massPercents = new double[UNIQUE_NUS];
104 for (int i = 0; i < UNIQUE_NUS; i++) {
105 massPercents[i] = Math.round(((MOL_WEIGHTS[i] * nucCount[i]) / totalMass) * 1000);
106 massPercents[i] /= 10;
107 }
108 return massPercents;
109 }
110
111 // This method accepts the string containing the entire nucleotide sequence, ignores all the
112 // junk sections, and then breaks the sequence into individual codons which it then stores in an
113 // array, returning the array to main in the end.
114 public static String[] getCodons(String protein) {
115 protein = protein.replace("-", "");
116 int numCodons = protein.length() / NUS_PER_CODON;
117 String[] codons = new String[numCodons];
118 for (int i = 0; i < numCodons; i++) {
119 // The line below pulls out each codon.
120 codons[i] = protein.substring((i * NUS_PER_CODON), ((i * NUS_PER_CODON) + NUS_PER_CODON));
121 }
122 return codons;
123 }
124
125
126 // This method accepts the String array with the invidual codons and the double array containing
127 // the unique mass percents, then checks if the nucleotide sequence passes a series of tests
128 // to determine whether or not the sequence qualifies as a protein, returning either "YES" if it
129 // is a protein or "NO" if it isn't to main.
130 public static String proteinCheck(String[] codonList, double[] massPercent) {
131 String startCodon = codonList[0];
132 String stopCodon = codonList[codonList.length - 1];
133 double cPlusG = massPercent[1] + massPercent[2];
134 if (codonList.length < MIN_CODONS) {
135 return "NO";
136 } else if (cPlusG < GC_PERCENT) {
137 return "NO";
138 } else if (!startCodon.equals(START_CODON)) {
139 return "NO";
140 }
141 for (int i = 0; i < STOP_CODONS.length; i++) {
142 if (stopCodon.equals(STOP_CODONS[i])) {
143 return "YES";
144 }
145 }
146 return "NO";
147 }
148
149 // This method accepts the printstream file, the name of the protein, the nucleotide sequence,
150 // the array containing the number of each unique nucleotide, the double array containing the
151 // mass percents, the String array containing the list of codons, and the string decision
152 // if it is a protein or not, and then prints them all out to the output file through the
153 // printstream.
154 public static void output(PrintStream outFile, String protName, String protein, int[] nucCount,
155 double[] massPercent, double totalMass, String[] codonList, String isProtein) {
156
157 outFile.println("Region Name: " + protName);
158 outFile.println("Nucleotides: " + protein);
159 outFile.println("Nuc. Counts: " + Arrays.toString(nucCount));
160 outFile.print("Total Mass%: " + Arrays.toString(massPercent));
161 outFile.printf(" of %.1f", totalMass);
162 outFile.println();
163 outFile.println("Codons List: " + Arrays.toString(codonList));
164 outFile.println("Is Protein?: " + isProtein);
165 outFile.println();
166 }
167 }
2 // ID#: 1631185
3 // CSE 142B
4 // TA: Aaron Su
5 // Assignment 6
6
7 // This program reads a file containing a number of nucleotide sequences. After analyzing the
8 // sequence, it tells the user the name of the sequence, the nucleotides in the sequence,
9 // the number of each unique nucleotide in the molecule, the mass percent of each unique
10 // nucleotide, a list of the codons making up the sequence, and finally checks to see if
11 // the nucleotide sequence constitutes a protein.
12
13 import java.util.*;
14 import java.io.*;
15
16 public class DNA {
17 public static final int MIN_CODONS = 5; // Minimum number of codons for a protein.
18 public static final double GC_PERCENT = 30.0; // Minimum combined mass percentage for G and C.
19 public static final int[] NUS = {'A', 'C', 'G', 'T'}; // The unique nucleotides.
20 public static final int UNIQUE_NUS = NUS.length; // Number of unique nucleotides.
21 public static final int NUS_PER_CODON = 3; // Number of nucleotides per Codon.
22 public static final double[] MOL_WEIGHTS = {135.128, 111.103, 151.128, 125.107};
23 // Molecular weights for each unique nucleotide (A, C, G, T).
24 public static final double JUNK_WEIGHT = 100.00; // Molecular weight of the junk sections.
25 public static final String START_CODON = "ATG"; // Only valid start codon nucleotide sequence.
26 public static final String[] STOP_CODONS = {"TAA", "TAG", "TGA"}; // Only valid stop codon
27 // nucleotide sequences.
28
29 public static void main(String[] args) throws FileNotFoundException {
30 Scanner console = new Scanner(System.in);
31 String[] files = intro(console);
32 File input = new File(files[0]);
33 Scanner dataFile = new Scanner(input);
34 PrintStream outFile = new PrintStream(new File(files[1]));
35 while (dataFile.hasNextLine()) {
36 String protName = dataFile.nextLine();
37 String protein = dataFile.nextLine().toUpperCase();
38 int[] nucCount = getNucCount(protein);
39 double totalMass = getTotalMass(protein);
40 double[] massPercent = getMassPercent(nucCount, totalMass);
41 String[] codonList = getCodons(protein);
42 String isProtein = proteinCheck(codonList, massPercent);
43 output(outFile, protName, protein, nucCount, massPercent, totalMass, codonList, isProtein);
44 }
45 }
46
47 // This method accepts the scanner for user input as a parameter and prints out the introduction
48 // to the program, and then gets the name for the input file and the name for the output file and
49 // returns them to main in an array.
50 public static String[] intro(Scanner console) {
51 System.out.println("This program reports information about DNA");
52 System.out.println("nucleotide sequences that may encode proteins.");
53 System.out.print("Input file name? ");
54 String inName = console.next();
55 System.out.print("Output file name? ");
56 String outName = console.next();
57 String[] files = {inName, outName};
58 return files;
59 }
60
61 // This method accepts the nucleotide sequence as a String and totals the number of each unique
62 // nucleotide is present in the sequence, stores the numbers in an array, and then returns the
63 // array to main.
64 public static int[] getNucCount(String protein) {
65 int[] nucCount = new int[UNIQUE_NUS];
66 for (int i = 0; i < protein.length(); i++) {
67 char nuAcid = protein.charAt(i);
68 for (int j = 0; j < UNIQUE_NUS; j++) {
69 if (nuAcid == NUS[j]) {
70 nucCount[j]++;
71 }
72 }
73 }
74 return nucCount;
75 }
76
77 // This method accepts the nucleotide sequence as a String and then finds the total mass of the
78 // sequence by checking each nucleotide and tabulating a cumulative sum based on the individual
79 // weights for each nucleotide, including the junk sections. It then returns the value to main
80 // for further use.
81 public static double getTotalMass(String protein) {
82 double totalMass = 0.0;
83 int junkCount = 0;
84 for (int i = 0; i < protein.length(); i++) {
85 char nuAcid = protein.charAt(i);
86 for (int j = 0; j < UNIQUE_NUS; j++) {
87 if (nuAcid == NUS[j]) {
88 totalMass += MOL_WEIGHTS[j];
89 }
90 }
91 if (nuAcid == '-') {
92 totalMass += JUNK_WEIGHT;
93 }
94 }
95 return totalMass;
96 }
97
98 // This method accepts the array containing the number of each unique nucleotide and the value
99 // for the total mass of the nucleotide sequence, junk included, and then calculates the mass
100 // percent of each unique nucleotide and stores the values in an array which then is returned
101 // to main.
102 public static double[] getMassPercent(int[] nucCount, double totalMass) {
103 double [] massPercents = new double[UNIQUE_NUS];
104 for (int i = 0; i < UNIQUE_NUS; i++) {
105 massPercents[i] = Math.round(((MOL_WEIGHTS[i] * nucCount[i]) / totalMass) * 1000);
106 massPercents[i] /= 10;
107 }
108 return massPercents;
109 }
110
111 // This method accepts the string containing the entire nucleotide sequence, ignores all the
112 // junk sections, and then breaks the sequence into individual codons which it then stores in an
113 // array, returning the array to main in the end.
114 public static String[] getCodons(String protein) {
115 protein = protein.replace("-", "");
116 int numCodons = protein.length() / NUS_PER_CODON;
117 String[] codons = new String[numCodons];
118 for (int i = 0; i < numCodons; i++) {
119 // The line below pulls out each codon.
120 codons[i] = protein.substring((i * NUS_PER_CODON), ((i * NUS_PER_CODON) + NUS_PER_CODON));
121 }
122 return codons;
123 }
124
125
126 // This method accepts the String array with the invidual codons and the double array containing
127 // the unique mass percents, then checks if the nucleotide sequence passes a series of tests
128 // to determine whether or not the sequence qualifies as a protein, returning either "YES" if it
129 // is a protein or "NO" if it isn't to main.
130 public static String proteinCheck(String[] codonList, double[] massPercent) {
131 String startCodon = codonList[0];
132 String stopCodon = codonList[codonList.length - 1];
133 double cPlusG = massPercent[1] + massPercent[2];
134 if (codonList.length < MIN_CODONS) {
135 return "NO";
136 } else if (cPlusG < GC_PERCENT) {
137 return "NO";
138 } else if (!startCodon.equals(START_CODON)) {
139 return "NO";
140 }
141 for (int i = 0; i < STOP_CODONS.length; i++) {
142 if (stopCodon.equals(STOP_CODONS[i])) {
143 return "YES";
144 }
145 }
146 return "NO";
147 }
148
149 // This method accepts the printstream file, the name of the protein, the nucleotide sequence,
150 // the array containing the number of each unique nucleotide, the double array containing the
151 // mass percents, the String array containing the list of codons, and the string decision
152 // if it is a protein or not, and then prints them all out to the output file through the
153 // printstream.
154 public static void output(PrintStream outFile, String protName, String protein, int[] nucCount,
155 double[] massPercent, double totalMass, String[] codonList, String isProtein) {
156
157 outFile.println("Region Name: " + protName);
158 outFile.println("Nucleotides: " + protein);
159 outFile.println("Nuc. Counts: " + Arrays.toString(nucCount));
160 outFile.print("Total Mass%: " + Arrays.toString(massPercent));
161 outFile.printf(" of %.1f", totalMass);
162 outFile.println();
163 outFile.println("Codons List: " + Arrays.toString(codonList));
164 outFile.println("Is Protein?: " + isProtein);
165 outFile.println();
166 }
167 }