Hi,
Is this is what you're looking for ?
my %hash;
while( my $line = <DATA> ){
chomp $line;
my ( $scaf, $pro_per ) = $line =~ m/\sHit=(.*?)\s.*?Percent_id=(.*?)$/g;
push @{$hash{$1}}, $2;
}
print Dumper (\%hash);
Output:
$VAR1 = {
'scaffold293_size341291' => [
'228.36676217765',
'241.818181818182',
'240',
'233.076923076923',
'241.904761904762',
'227.461139896373',
'222.666666666667'
],
'scaffold4_size6989527' => [
'235.023041474654',
'247.663551401869',
'247.663551401869',
'224.137931034483',
'236.734693877551',
'237.634408602151',
'237.777777777778',
'231.707317073171',
'230.337078651685'
]
};
__DATA__
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=349
Percent_id=228.36676217765
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110
Percent_id=241.818181818182
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110
Percent_id=240
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=130
Percent_id=233.076923076923
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=105
Percent_id=241.904761904762
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=193
Percent_id=227.461139896373
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=150
Percent_id=222.666666666667
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=217
Percent_id=235.023041474654
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107
Percent_id=247.663551401869
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107
Percent_id=247.663551401869
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=174
Percent_id=224.137931034483
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=98
Percent_id=236.734693877551
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=93
Percent_id=237.634408602151
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=90
Percent_id=237.777777777778
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=82
Percent_id=231.707317073171
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=89
Percent_id=230.337078651685
On Sun, Sep 20, 2015 at 5:56 PM, Alaba, Oluwafemi (IITA) <[email protected]>
wrote:
> Dear ALL,
>
> I have a file that looks like this.
>
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=349
> Percent_id=228.36676217765
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110
> Percent_id=241.818181818182
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110
> Percent_id=240
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=130
> Percent_id=233.076923076923
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=105
> Percent_id=241.904761904762
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=193
> Percent_id=227.461139896373
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=150
> Percent_id=222.666666666667
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=217
> Percent_id=235.023041474654
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107
> Percent_id=247.663551401869
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107
> Percent_id=247.663551401869
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=174
> Percent_id=224.137931034483
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=98
> Percent_id=236.734693877551
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=93
> Percent_id=237.634408602151
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=90
> Percent_id=237.777777777778
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=82
> Percent_id=231.707317073171
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=89
> Percent_id=230.337078651685
>
> I need hints to write a script that will recognise the fragments of
> protein in the same scaffolds.
>
> Best wishes,
>
> Alaba
>