Aritalab:Lecture/Programming/Java/Alignment
From Metabolomics.JP
< Aritalab:Lecture | Programming | Java(Difference between revisions)
(Created page with "配列アライメントアルゴリズムの全体を以下に示します。このページにあるコードを全てつなげて Alignment.java としてコンパイル、実...") |
m |
||
| (8 intermediate revisions by one user not shown) | |||
| Line 1: | Line 1: | ||
| − | + | 配列アライメントアルゴリズムの全体を以下に示します。このページにあるコード部分を全てつなげて Alignment.java としてコンパイル、実行してください。 | |
| + | |||
| + | このファイルだけで動きます。 | ||
| + | |||
__FORCETOC__ | __FORCETOC__ | ||
| + | |||
===クラスおよびペナルティの定義=== | ===クラスおよびペナルティの定義=== | ||
<pre> | <pre> | ||
public class Alignment { | public class Alignment { | ||
| − | + | private int LOCAL = 0, GLOBAL = 1; | |
| − | + | private int AlignType = -1; | |
| − | + | private int MatchScore = 1; | |
| − | + | private int GapPenalty = -2; | |
| − | + | private int MismatchPenalty = -1; | |
| − | + | private int[][] scoreTable; | |
| − | + | private String seqX, seqY; | |
| − | + | public Alignment(String x, String y) { | |
| − | + | seqX = x; | |
| − | + | seqY = y; | |
| − | + | scoreTable = new int[x.length() + 1][y.length() + 1]; | |
| − | + | } | |
| − | + | private void initializeTable() { | |
| − | + | scoreTable[0][0] = 0; | |
| − | + | for (int i = 1; i < scoreTable.length; i++) | |
| − | + | scoreTable[i][0] = scoreTable[i - 1][0] + AlignType * GapPenalty; | |
| − | + | for (int i = 1; i < scoreTable[0].length; i++) | |
| − | + | scoreTable[0][i] = scoreTable[0][i - 1] + AlignType * GapPenalty; | |
| − | + | } | |
| − | + | private int getMax(int x, int y, int z, int w) { | |
| − | + | int t = x > y ? x : y; | |
| − | + | int s = z > w ? z : w; | |
| − | + | return (t > s ? t : s); | |
| − | + | } | |
| − | + | private int getMax(int x, int y, int z) { | |
| − | + | int t = x > y ? x : y; | |
| − | + | return (t > z ? t : z); | |
| − | + | } | |
</pre> | </pre> | ||
| + | |||
===トレースバック関数=== | ===トレースバック関数=== | ||
| − | 重要なのは do { ... } while 文の中身だけで、後はトレースを印刷するための準備です。 | + | 重要なのは traceBack関数の do { ... } while 文の中身だけで、後はトレースを印刷するための準備です。 |
<pre> | <pre> | ||
| − | + | private String[] traceBack() { | |
| − | + | int x = 0, y = 0; | |
| − | + | if (AlignType == LOCAL) { | |
| − | + | // Search for the maximum score brute-force (very inefficient). | |
| − | + | // The max score can be recorded when computing scoreTable. | |
| − | + | int max = 0; | |
| − | + | for (int i = 0; i < scoreTable.length; i++) { | |
| − | + | for (int j = 0; j < scoreTable[0].length; j++) | |
| − | + | if (scoreTable[i][j] >= max) { | |
| − | + | x = i; | |
| − | + | y = j; | |
| − | + | max = scoreTable[i][j]; | |
| − | + | } | |
| − | + | } | |
| − | + | } else if (AlignType == GLOBAL) { | |
| − | + | x = scoreTable.length - 1; | |
| − | + | y = scoreTable[0].length - 1; | |
| − | + | } | |
| − | + | StringBuffer line1 = new StringBuffer(); | |
| − | + | StringBuffer line2 = new StringBuffer(); | |
| − | + | line1.append(':'); | |
| − | + | line1.append(x); | |
| − | + | line2.append(':'); | |
| − | + | line2.append(y); | |
| − | + | do { | |
| − | + | if (scoreTable[x - 1][y] + GapPenalty == scoreTable[x][y]) { | |
| − | + | line1.insert(0, seqX.charAt(x-- - 1)); | |
| − | + | line2.insert(0, '-'); | |
| − | + | } else if (scoreTable[x][y - 1] + GapPenalty == scoreTable[x][y]) { | |
| − | + | line1.insert(0, '-'); | |
| − | + | line2.insert(0, seqY.charAt(y-- - 1)); | |
| − | + | } else { | |
| − | + | line1.insert(0, seqX.charAt(x-- - 1)); | |
| − | + | line2.insert(0, seqY.charAt(y-- - 1)); | |
| − | + | } | |
| − | + | } while (x != 0 && y != 0 | |
| − | + | && (AlignType == GLOBAL ? true : scoreTable[x][y] != 0)); | |
| − | + | line1.insert(0, ':'); | |
| − | + | line1.insert(0, String.format("%1$3d", x + 1)); | |
| − | + | line2.insert(0, ':'); | |
| − | + | line2.insert(0, String.format("%1$3d", y + 1)); | |
| − | + | return new String[] { line1.toString(), line2.toString() }; | |
| − | + | } | |
| − | + | public void printTrace() { | |
| − | + | String[] trace = traceBack(); | |
| − | + | String x = trace[0]; | |
| − | + | String y = trace[1]; | |
| − | + | StringBuffer sb = new StringBuffer(); | |
| − | + | sb.append(" "); | |
| − | + | for (int i = 4; x.charAt(i) != ':'; i++) | |
| − | + | sb.append(x.charAt(i) == y.charAt(i) ? "|" : " "); | |
| − | + | sb.append(" "); | |
| − | + | String z = sb.toString(); | |
| − | + | ||
| − | + | int width = 50; | |
| − | + | int i = -1; | |
| − | + | while (++i * width < x.length()) { | |
| − | + | System.out.println(x.substring(i * width, | |
| − | + | Math.min(x.length(), (i + 1) * width))); | |
| − | + | System.out.println(z.substring(i * width, | |
| − | + | Math.min(z.length(), (i + 1) * width))); | |
| − | + | System.out.println(y.substring(i * width, | |
| − | + | Math.min(y.length(), (i + 1) * width))); | |
| − | + | } | |
| − | + | } | |
| − | + | ||
| − | + | ||
| − | + | ||
| − | + | ||
| − | + | ||
| − | + | ||
| − | + | ||
| − | + | ||
| − | + | ||
| − | + | ||
| − | + | ||
</pre> | </pre> | ||
===アルゴリズム=== | ===アルゴリズム=== | ||
| − | + | getMax関数で最大値を取る漸化式を for ループで二重に回しているだけです。大域アライメントと局所アライメントの違いは、漸化式の候補に 0 を加えるだけです。 | |
<pre> | <pre> | ||
| − | + | public void NeedlemanWunsch() { | |
| − | + | AlignType = GLOBAL; | |
| − | + | initializeTable(); | |
| − | + | for (int i = 1; i < scoreTable.length; i++) | |
| − | + | for (int j = 1; j < scoreTable[0].length; j++) | |
| − | + | scoreTable[i][j] = getMax( | |
| − | + | scoreTable[i - 1][j] + GapPenalty, | |
| − | + | scoreTable[i][j - 1] + GapPenalty, | |
| − | + | scoreTable[i - 1][j - 1] | |
| − | + | + (seqX.charAt(i - 1) == seqY.charAt(j - 1) ? MatchScore | |
| − | + | : MismatchPenalty)); | |
| − | + | } | |
| − | + | public void SmithWaterman() { | |
| − | + | AlignType = LOCAL; | |
| − | + | initializeTable(); | |
| − | + | for (int i = 1; i < scoreTable.length; i++) | |
| − | + | for (int j = 1; j < scoreTable[0].length; j++) | |
| − | + | scoreTable[i][j] = getMax( | |
| − | + | 0, | |
| − | + | scoreTable[i - 1][j] + GapPenalty, | |
| − | + | scoreTable[i][j - 1] + GapPenalty, | |
| − | + | scoreTable[i - 1][j - 1] | |
| − | + | + (seqX.charAt(i - 1) == seqY.charAt(j - 1) ? MatchScore | |
| − | + | : MismatchPenalty)); | |
| + | } | ||
</pre> | </pre> | ||
| + | |||
===メイン関数=== | ===メイン関数=== | ||
<pre> | <pre> | ||
| − | + | public void printTable() { | |
| − | + | if (scoreTable == null) | |
| − | + | return; | |
| − | + | StringBuffer sb = new StringBuffer(); | |
| − | + | for (int i = 0; i < scoreTable.length; i++) { | |
| − | + | for (int j = 0; j < scoreTable[0].length; j++) | |
| − | + | sb.append(String.format("%1$4d", scoreTable[i][j])); | |
| − | + | sb.append("\n"); | |
| + | } | ||
| + | System.out.println(sb.toString()); | ||
| + | } | ||
| + | |||
| + | public static void main(String[] args) { | ||
| + | Alignment A = new Alignment("aattgaagg", "gctagg"); | ||
| + | A.NeedlemanWunsch(); | ||
| + | //A.SmithWaterman(); | ||
| + | A.printTable(); | ||
| + | A.printTrace(); | ||
| + | } | ||
} | } | ||
| + | </pre> | ||
| + | |||
| + | ===実行結果=== | ||
| + | <pre> | ||
| + | > java Alignment | ||
| + | |||
| + | 0 -2 -4 -6 -8 -10 -12 | ||
| + | -2 -1 -3 -5 -5 -7 -9 | ||
| + | -4 -3 -2 -4 -4 -6 -8 | ||
| + | -6 -5 -4 -1 -3 -5 -7 | ||
| + | -8 -7 -6 -3 -2 -4 -6 | ||
| + | -10 -7 -8 -5 -4 -1 -3 | ||
| + | -12 -9 -8 -7 -4 -3 -2 | ||
| + | -14 -11 -10 -9 -6 -5 -4 | ||
| + | -16 -13 -12 -11 -8 -5 -4 | ||
| + | -18 -15 -14 -13 -10 -7 -4 | ||
| + | |||
| + | 1:aattgaagg:9 | ||
| + | | | || | ||
| + | 1:gct--a-gg:6 | ||
</pre> | </pre> | ||
Latest revision as of 14:46, 20 December 2011
配列アライメントアルゴリズムの全体を以下に示します。このページにあるコード部分を全てつなげて Alignment.java としてコンパイル、実行してください。
このファイルだけで動きます。
Contents |
[edit] クラスおよびペナルティの定義
public class Alignment {
private int LOCAL = 0, GLOBAL = 1;
private int AlignType = -1;
private int MatchScore = 1;
private int GapPenalty = -2;
private int MismatchPenalty = -1;
private int[][] scoreTable;
private String seqX, seqY;
public Alignment(String x, String y) {
seqX = x;
seqY = y;
scoreTable = new int[x.length() + 1][y.length() + 1];
}
private void initializeTable() {
scoreTable[0][0] = 0;
for (int i = 1; i < scoreTable.length; i++)
scoreTable[i][0] = scoreTable[i - 1][0] + AlignType * GapPenalty;
for (int i = 1; i < scoreTable[0].length; i++)
scoreTable[0][i] = scoreTable[0][i - 1] + AlignType * GapPenalty;
}
private int getMax(int x, int y, int z, int w) {
int t = x > y ? x : y;
int s = z > w ? z : w;
return (t > s ? t : s);
}
private int getMax(int x, int y, int z) {
int t = x > y ? x : y;
return (t > z ? t : z);
}
[edit] トレースバック関数
重要なのは traceBack関数の do { ... } while 文の中身だけで、後はトレースを印刷するための準備です。
private String[] traceBack() {
int x = 0, y = 0;
if (AlignType == LOCAL) {
// Search for the maximum score brute-force (very inefficient).
// The max score can be recorded when computing scoreTable.
int max = 0;
for (int i = 0; i < scoreTable.length; i++) {
for (int j = 0; j < scoreTable[0].length; j++)
if (scoreTable[i][j] >= max) {
x = i;
y = j;
max = scoreTable[i][j];
}
}
} else if (AlignType == GLOBAL) {
x = scoreTable.length - 1;
y = scoreTable[0].length - 1;
}
StringBuffer line1 = new StringBuffer();
StringBuffer line2 = new StringBuffer();
line1.append(':');
line1.append(x);
line2.append(':');
line2.append(y);
do {
if (scoreTable[x - 1][y] + GapPenalty == scoreTable[x][y]) {
line1.insert(0, seqX.charAt(x-- - 1));
line2.insert(0, '-');
} else if (scoreTable[x][y - 1] + GapPenalty == scoreTable[x][y]) {
line1.insert(0, '-');
line2.insert(0, seqY.charAt(y-- - 1));
} else {
line1.insert(0, seqX.charAt(x-- - 1));
line2.insert(0, seqY.charAt(y-- - 1));
}
} while (x != 0 && y != 0
&& (AlignType == GLOBAL ? true : scoreTable[x][y] != 0));
line1.insert(0, ':');
line1.insert(0, String.format("%1$3d", x + 1));
line2.insert(0, ':');
line2.insert(0, String.format("%1$3d", y + 1));
return new String[] { line1.toString(), line2.toString() };
}
public void printTrace() {
String[] trace = traceBack();
String x = trace[0];
String y = trace[1];
StringBuffer sb = new StringBuffer();
sb.append(" ");
for (int i = 4; x.charAt(i) != ':'; i++)
sb.append(x.charAt(i) == y.charAt(i) ? "|" : " ");
sb.append(" ");
String z = sb.toString();
int width = 50;
int i = -1;
while (++i * width < x.length()) {
System.out.println(x.substring(i * width,
Math.min(x.length(), (i + 1) * width)));
System.out.println(z.substring(i * width,
Math.min(z.length(), (i + 1) * width)));
System.out.println(y.substring(i * width,
Math.min(y.length(), (i + 1) * width)));
}
}
[edit] アルゴリズム
getMax関数で最大値を取る漸化式を for ループで二重に回しているだけです。大域アライメントと局所アライメントの違いは、漸化式の候補に 0 を加えるだけです。
public void NeedlemanWunsch() {
AlignType = GLOBAL;
initializeTable();
for (int i = 1; i < scoreTable.length; i++)
for (int j = 1; j < scoreTable[0].length; j++)
scoreTable[i][j] = getMax(
scoreTable[i - 1][j] + GapPenalty,
scoreTable[i][j - 1] + GapPenalty,
scoreTable[i - 1][j - 1]
+ (seqX.charAt(i - 1) == seqY.charAt(j - 1) ? MatchScore
: MismatchPenalty));
}
public void SmithWaterman() {
AlignType = LOCAL;
initializeTable();
for (int i = 1; i < scoreTable.length; i++)
for (int j = 1; j < scoreTable[0].length; j++)
scoreTable[i][j] = getMax(
0,
scoreTable[i - 1][j] + GapPenalty,
scoreTable[i][j - 1] + GapPenalty,
scoreTable[i - 1][j - 1]
+ (seqX.charAt(i - 1) == seqY.charAt(j - 1) ? MatchScore
: MismatchPenalty));
}
[edit] メイン関数
public void printTable() {
if (scoreTable == null)
return;
StringBuffer sb = new StringBuffer();
for (int i = 0; i < scoreTable.length; i++) {
for (int j = 0; j < scoreTable[0].length; j++)
sb.append(String.format("%1$4d", scoreTable[i][j]));
sb.append("\n");
}
System.out.println(sb.toString());
}
public static void main(String[] args) {
Alignment A = new Alignment("aattgaagg", "gctagg");
A.NeedlemanWunsch();
//A.SmithWaterman();
A.printTable();
A.printTrace();
}
}
[edit] 実行結果
> java Alignment
0 -2 -4 -6 -8 -10 -12
-2 -1 -3 -5 -5 -7 -9
-4 -3 -2 -4 -4 -6 -8
-6 -5 -4 -1 -3 -5 -7
-8 -7 -6 -3 -2 -4 -6
-10 -7 -8 -5 -4 -1 -3
-12 -9 -8 -7 -4 -3 -2
-14 -11 -10 -9 -6 -5 -4
-16 -13 -12 -11 -8 -5 -4
-18 -15 -14 -13 -10 -7 -4
1:aattgaagg:9
| | ||
1:gct--a-gg:6