fastx.cr
Fastx.cr
A Crystal library for reading and writing FASTA and FASTQ files.
Installation
-
Add the dependency to your
shard.yml
:dependencies: fastx: github: bio-cr/fastx.c
-
Run
shards install
Usage
Reading FASTA files
require "fastx"
# Using Reader directly
reader = Fastx::Fasta::Reader.new("file.fa")
reader.each do |name, sequence|
puts "Name: #{name}"
puts "Sequence: #{sequence.to_s}"
end
reader.close
# Using block (automatically closes)
Fastx::Fasta::Reader.open("file.fa") do |reader|
reader.each do |name, sequence|
puts "Name: #{name}"
puts "Sequence: #{sequence.to_s}"
end
end
# Using each_copy for String copies (avoids buffer reuse issues)
Fastx::Fasta::Reader.open("file.fa") do |reader|
reader.each_copy do |name, sequence|
puts "Name: #{name}"
puts "Sequence: #{sequence}" # sequence is already a String
end
end
Reading FASTQ files
# Using Reader directly
reader = Fastx::Fastq::Reader.new("file.fq")
reader.each do |identifier, sequence, quality|
puts "ID: #{identifier}"
puts "Sequence: #{sequence.to_s}"
puts "Quality: #{quality.to_s}"
end
reader.close
# Using block (automatically closes)
Fastx::Fastq::Reader.open("file.fq") do |reader|
reader.each do |identifier, sequence, quality|
puts "ID: #{identifier}"
puts "Sequence: #{sequence.to_s}"
puts "Quality: #{quality.to_s}"
end
end
# Using each_copy for String copies (avoids buffer reuse issues)
Fastx::Fastq::Reader.open("file.fq") do |reader|
reader.each_copy do |identifier, sequence, quality|
puts "ID: #{identifier}"
puts "Sequence: #{sequence}" # sequence is already a String
puts "Quality: #{quality}" # quality is already a String
end
end
Writing FASTA files
# Using Writer directly
writer = Fastx::Fasta::Writer.new("output.fa")
writer.write("seq1", "ACGTACGT")
writer.write("seq2", "TGCATGCA")
writer.close
# Using block (automatically closes)
Fastx::Fasta::Writer.open("output.fa") do |writer|
writer.write("seq1", "ACGTACGT")
writer.write("seq2", "TGCATGCA")
end
Writing FASTQ files
# Using Writer directly
writer = Fastx::Fastq::Writer.new("output.fq")
writer.write("seq1", "ACGTACGT", "!!!!!!!!")
writer.write("seq2", "TGCATGCA", "~~~~~~~~")
writer.close
# Using block (automatically closes)
Fastx::Fastq::Writer.open("output.fq") do |writer|
writer.write("seq1", "ACGTACGT", "!!!!!!!!")
writer.write("seq2", "TGCATGCA", "~~~~~~~~")
end
Format detection and specification
Format detection by file extension
# Format is inferred from file extension, but type casting is still required
Fastx.open("file.fa") do |reader|
reader.as(Fastx::Fasta::Reader).each do |name, sequence|
puts "#{name}: #{sequence.to_s}"
end
end
Fastx.open("file.fq") do |reader|
reader.as(Fastx::Fastq::Reader).each do |id, sequence, quality|
puts "#{id}: #{sequence.to_s}"
end
end
Explicit format specification
# Using Format enum for explicit format specification
Fastx.open("data", "r", Fastx::Format::FASTA) do |reader|
reader.as(Fastx::Fasta::Reader).each do |name, sequence|
puts "#{name}: #{sequence.to_s}"
end
end
Fastx.open("output", "w", Fastx::Format::FASTQ) do |writer|
writer.as(Fastx::Fastq::Writer).write("seq1", "ACGT", "!!!!")
end
Gzip support
Both reading and writing of gzip-compressed files are supported automatically when the filename ends with .gz
.
# Reads gzip-compressed FASTA
Fastx::Fasta::Reader.open("file.fa.gz") do |reader|
reader.each do |name, sequence|
puts "#{name}: #{sequence.to_s}"
end
end
# Writes gzip-compressed FASTQ
Fastx::Fastq::Writer.open("output.fq.gz") do |writer|
writer.write("seq1", "ACGT", "!!!!")
end
Note: Gzip refers to standard gzip, not BGZF (bgzip)
Base encoding
Convert DNA sequences to UInt8 arrays suitable for byte-wise or array processing:
# Encode bases to UInt8 array (A,C,G,T,N → 65,67,71,84,78; others → 78)
encoded = Fastx.encode_bases("AcGtNxyz")
# Returns: Slice[65u8, 67u8, 71u8, 84u8, 78u8, 78u8, 78u8, 78u8]
# Decode UInt8 array back to DNA string
decoded = Fastx.decode_bases(encoded)
# Returns: "ACGTNNNN"
Quality encoding
Convert quality strings to Phred score arrays and back:
# Encode quality string to Phred scores (Phred+33 by default)
phred_scores = Fastx.encode_phred("IIIIHGF") # => [40_u8, 40_u8, 40_u8, 40_u8, 39_u8, 38_u8, 37_u8]
# Decode Phred scores to quality string
quality_str = Fastx.decode_phred([40_u8, 40_u8, 40_u8, 40_u8, 39_u8, 38_u8, 37_u8]) # => "IIIIHGF"
# Specify offset for Phred+64
phred_scores64 = Fastx.encode_phred("dddd", offset: 64)
quality_str64 = Fastx.decode_phred([36_u8, 36_u8, 36_u8, 36_u8], offset: 64)
Contributing
- Fork it (https://github.com/bio-cr/fastx/fork)
- Create your feature branch (
git checkout -b my-new-feature
) - Commit your changes (
git commit -am 'Add some feature'
) - Push to the branch (
git push origin my-new-feature
) - Create a new Pull Request
License
MIT License
This project includes code generated by AI.
Repository
fastx.cr
Owner
Statistic
- 2
- 0
- 0
- 1
- 0
- 3 days ago
- April 25, 2024
License
MIT License
Links
Synced at
Sat, 21 Jun 2025 23:39:22 GMT
Languages