Is this an efficient version of RNA Transcription in C?

Here’s the program:

rna_transcription.c


<span style="font-weight:bold;color:#a71d5d;">#include </span><span style="color:#183691;">"rna_transcription.h"
</span><span style="font-weight:bold;color:#a71d5d;">#include </span><span style="color:#183691;"><malloc.h>
</span><span style="font-weight:bold;color:#a71d5d;">#include </span><span style="color:#183691;"><string.h>
</span><span style="color:#323232;">
</span><span style="font-weight:bold;color:#a71d5d;">static const char</span><span style="color:#323232;"> lookup[] </span><span style="font-weight:bold;color:#a71d5d;">= </span><span style="color:#323232;">{
</span><span style="color:#323232;">    [</span><span style="color:#183691;">'A'</span><span style="color:#323232;">] </span><span style="font-weight:bold;color:#a71d5d;">= </span><span style="color:#183691;">'U'</span><span style="color:#323232;">,
</span><span style="color:#323232;">    [</span><span style="color:#183691;">'C'</span><span style="color:#323232;">] </span><span style="font-weight:bold;color:#a71d5d;">= </span><span style="color:#183691;">'G'</span><span style="color:#323232;">,
</span><span style="color:#323232;">    [</span><span style="color:#183691;">'G'</span><span style="color:#323232;">] </span><span style="font-weight:bold;color:#a71d5d;">= </span><span style="color:#183691;">'C'</span><span style="color:#323232;">,
</span><span style="color:#323232;">    [</span><span style="color:#183691;">'T'</span><span style="color:#323232;">] </span><span style="font-weight:bold;color:#a71d5d;">= </span><span style="color:#183691;">'A'
</span><span style="color:#323232;">};
</span><span style="color:#323232;">
</span><span style="font-weight:bold;color:#a71d5d;">char *</span><span style="font-weight:bold;color:#795da3;">to_rna</span><span style="color:#323232;">(</span><span style="font-weight:bold;color:#a71d5d;">const char *</span><span style="color:#323232;">dna)
</span><span style="color:#323232;">{
</span><span style="color:#323232;">    </span><span style="font-weight:bold;color:#a71d5d;">if </span><span style="color:#323232;">(</span><span style="font-weight:bold;color:#a71d5d;">!</span><span style="color:#323232;">dna)
</span><span style="color:#323232;">        </span><span style="font-weight:bold;color:#a71d5d;">return </span><span style="color:#0086b3;">NULL</span><span style="color:#323232;">;
</span><span style="color:#323232;">
</span><span style="color:#323232;">    </span><span style="font-weight:bold;color:#a71d5d;">char *</span><span style="color:#323232;">rna </span><span style="font-weight:bold;color:#a71d5d;">= </span><span style="color:#62a35c;">calloc</span><span style="color:#323232;">(</span><span style="color:#62a35c;">strlen</span><span style="color:#323232;">(dna) </span><span style="font-weight:bold;color:#a71d5d;">+ </span><span style="color:#0086b3;">1</span><span style="color:#323232;">, </span><span style="color:#0086b3;">1</span><span style="color:#323232;">), </span><span style="font-weight:bold;color:#a71d5d;">*</span><span style="color:#323232;">start_rna </span><span style="font-weight:bold;color:#a71d5d;">=</span><span style="color:#323232;"> rna;
</span><span style="color:#323232;">    </span><span style="font-weight:bold;color:#a71d5d;">if </span><span style="color:#323232;">(rna)
</span><span style="color:#323232;">    {
</span><span style="color:#323232;">        </span><span style="font-weight:bold;color:#a71d5d;">for </span><span style="color:#323232;">(; </span><span style="font-weight:bold;color:#a71d5d;">*</span><span style="color:#323232;">dna; dna</span><span style="font-weight:bold;color:#a71d5d;">++</span><span style="color:#323232;">, rna</span><span style="font-weight:bold;color:#a71d5d;">++</span><span style="color:#323232;">)
</span><span style="color:#323232;">        {
</span><span style="color:#323232;">            </span><span style="font-weight:bold;color:#a71d5d;">if </span><span style="color:#323232;">(</span><span style="font-weight:bold;color:#a71d5d;">!</span><span style="color:#323232;">(</span><span style="font-weight:bold;color:#a71d5d;">*</span><span style="color:#323232;">rna </span><span style="font-weight:bold;color:#a71d5d;">=</span><span style="color:#323232;"> lookup[(</span><span style="font-weight:bold;color:#a71d5d;">int</span><span style="color:#323232;">)</span><span style="font-weight:bold;color:#a71d5d;">*</span><span style="color:#323232;">dna]))
</span><span style="color:#323232;">            {
</span><span style="color:#323232;">                </span><span style="color:#62a35c;">free</span><span style="color:#323232;">(rna);
</span><span style="color:#323232;">                </span><span style="font-weight:bold;color:#a71d5d;">return </span><span style="color:#0086b3;">NULL</span><span style="color:#323232;">;
</span><span style="color:#323232;">            }
</span><span style="color:#323232;">        }
</span><span style="color:#323232;">    }
</span><span style="color:#323232;">    </span><span style="font-weight:bold;color:#a71d5d;">return</span><span style="color:#323232;"> start_rna;
</span><span style="color:#323232;">}
</span>

rna_transcription.h


<span style="font-weight:bold;color:#a71d5d;">#ifndef</span><span style="color:#323232;"> RNA_TRANSCRIPTION_H
</span><span style="font-weight:bold;color:#a71d5d;">#define </span><span style="color:#323232;">RNA_TRANSCRIPTION_H
</span><span style="color:#323232;">
</span><span style="font-weight:bold;color:#a71d5d;">char *</span><span style="font-weight:bold;color:#795da3;">to_rna</span><span style="color:#323232;">(</span><span style="font-weight:bold;color:#a71d5d;">const char *</span><span style="color:#323232;">dna);
</span><span style="color:#323232;">
</span><span style="font-weight:bold;color:#a71d5d;">#endif
</span>

I can’t help but wonder how much of a waste of space the array would be. Surely, using a map is better, right?

GissaMittJobb, (edited )

A few things come to mind:

  1. The array is probably fine. It’s not going to be particularly large and lookups are O(1)
  2. It’s a bit weird to me that you’re using the char pointers as indices. I would probably use actual indices and spend the miniscule amount of additional stack data to improve the clarity of the code
  3. You could save on some indentation by returning early instead of nesting the for-loop inside the first if-statement
  4. Is the call to the lookup-table really safe? Maybe checking that the token from RNA is within the bounds is the way to go?
  5. The only thing I would even remotely care about with regards to performance is the malloc, and that’s not that big of a deal anyway unless the length of dna is really large. Streaming the result or overwriting the presumably already malloc’d input would be the only thing I would touch, and only if I could prove that it improves performance in practice.
  6. (added in edit): if you can guarantee that the input is well formed, you can omit the bounds check and save some effort there.
velox_vulnus,

That is not my program actually. Here’s what I’ve come up with:

rna_transcription.c


<span style="font-weight:bold;color:#a71d5d;">#include </span><span style="color:#183691;">"rna_transcription.h"
</span><span style="color:#323232;">
</span><span style="font-weight:bold;color:#a71d5d;">static char </span><span style="font-weight:bold;color:#795da3;">transcribe_nucleotide</span><span style="color:#323232;">(</span><span style="font-weight:bold;color:#a71d5d;">char </span><span style="color:#323232;">nucleotide) {
</span><span style="color:#323232;">    </span><span style="font-weight:bold;color:#a71d5d;">switch </span><span style="color:#323232;">(nucleotide) {
</span><span style="color:#323232;">        </span><span style="font-weight:bold;color:#a71d5d;">case </span><span style="color:#183691;">'G'</span><span style="color:#323232;">:
</span><span style="color:#323232;">            </span><span style="font-weight:bold;color:#a71d5d;">return </span><span style="color:#183691;">'C'</span><span style="color:#323232;">;
</span><span style="color:#323232;">        </span><span style="font-weight:bold;color:#a71d5d;">case </span><span style="color:#183691;">'C'</span><span style="color:#323232;">:
</span><span style="color:#323232;">            </span><span style="font-weight:bold;color:#a71d5d;">return </span><span style="color:#183691;">'G'</span><span style="color:#323232;">;
</span><span style="color:#323232;">        </span><span style="font-weight:bold;color:#a71d5d;">case </span><span style="color:#183691;">'T'</span><span style="color:#323232;">:
</span><span style="color:#323232;">            </span><span style="font-weight:bold;color:#a71d5d;">return </span><span style="color:#183691;">'A'</span><span style="color:#323232;">;
</span><span style="color:#323232;">        </span><span style="font-weight:bold;color:#a71d5d;">case </span><span style="color:#183691;">'A'</span><span style="color:#323232;">:
</span><span style="color:#323232;">            </span><span style="font-weight:bold;color:#a71d5d;">return </span><span style="color:#183691;">'U'</span><span style="color:#323232;">;
</span><span style="color:#323232;">        </span><span style="font-weight:bold;color:#a71d5d;">default</span><span style="color:#323232;">:
</span><span style="color:#323232;">            </span><span style="font-weight:bold;color:#a71d5d;">return</span><span style="color:#323232;"> nucleotide;
</span><span style="color:#323232;">    }
</span><span style="color:#323232;">}
</span><span style="color:#323232;">
</span><span style="font-weight:bold;color:#a71d5d;">char *</span><span style="font-weight:bold;color:#795da3;">to_rna</span><span style="color:#323232;">(</span><span style="font-weight:bold;color:#a71d5d;">const char *</span><span style="color:#323232;">dna) {
</span><span style="color:#323232;">    </span><span style="color:#0086b3;">size_t</span><span style="color:#323232;"> len </span><span style="font-weight:bold;color:#a71d5d;">= </span><span style="color:#62a35c;">strlen</span><span style="color:#323232;">(dna);
</span><span style="color:#323232;">    </span><span style="font-weight:bold;color:#a71d5d;">char *</span><span style="color:#323232;">rna </span><span style="font-weight:bold;color:#a71d5d;">= </span><span style="color:#62a35c;">malloc</span><span style="color:#323232;">((len </span><span style="font-weight:bold;color:#a71d5d;">+ </span><span style="color:#0086b3;">1</span><span style="color:#323232;">) </span><span style="font-weight:bold;color:#a71d5d;">* sizeof</span><span style="color:#323232;">(</span><span style="font-weight:bold;color:#a71d5d;">char</span><span style="color:#323232;">));
</span><span style="color:#323232;">
</span><span style="color:#323232;">    </span><span style="font-weight:bold;color:#a71d5d;">for </span><span style="color:#323232;">(</span><span style="color:#0086b3;">size_t</span><span style="color:#323232;"> i </span><span style="font-weight:bold;color:#a71d5d;">= </span><span style="color:#0086b3;">0</span><span style="color:#323232;">; i </span><span style="font-weight:bold;color:#a71d5d;"><=</span><span style="color:#323232;"> len; </span><span style="font-weight:bold;color:#a71d5d;">++</span><span style="color:#323232;">i) {
</span><span style="color:#323232;">        rna[i] </span><span style="font-weight:bold;color:#a71d5d;">= </span><span style="color:#323232;">transcribe_nucleotide(dna[i]);
</span><span style="color:#323232;">    }
</span><span style="color:#323232;">
</span><span style="color:#323232;">    </span><span style="font-weight:bold;color:#a71d5d;">return</span><span style="color:#323232;"> rna;
</span><span style="color:#323232;">}
</span>

rna_transcription.h


<span style="font-weight:bold;color:#a71d5d;">#ifndef</span><span style="color:#323232;"> RNA_TRANSCRIPTION_H
</span><span style="font-weight:bold;color:#a71d5d;">#define </span><span style="color:#323232;">RNA_TRANSCRIPTION_H
</span><span style="color:#323232;">
</span><span style="font-weight:bold;color:#a71d5d;">#include </span><span style="color:#183691;"><string.h>
</span><span style="font-weight:bold;color:#a71d5d;">#include </span><span style="color:#183691;"><stdlib.h>
</span><span style="color:#323232;">
</span><span style="font-weight:bold;color:#a71d5d;">char *</span><span style="font-weight:bold;color:#795da3;">to_rna</span><span style="color:#323232;">(</span><span style="font-weight:bold;color:#a71d5d;">const char *</span><span style="color:#323232;">dna);
</span><span style="color:#323232;">
</span><span style="font-weight:bold;color:#a71d5d;">#endif
</span>

I could not find the equivalent of map in standard library, so that is why I was interested in the community solutions.

GissaMittJobb,

A switch statement is probably a decent option here, yeah. You trade off a little bit of memory for what might be a few more instructions executing the switch statement, unless the compiler picks up on it and optimizes it. Maybe check godbolt for what gets generated in practice if you really care about it.

  • All
  • Subscribed
  • Moderated
  • Favorites
  • programming@lemmy.ml
  • rosin
  • osvaldo12
  • Durango
  • ngwrru68w68
  • khanakhh
  • DreamBathrooms
  • mdbf
  • magazineikmin
  • Youngstown
  • everett
  • InstantRegret
  • slotface
  • thenastyranch
  • kavyap
  • JUstTest
  • GTA5RPClips
  • cisconetworking
  • cubers
  • modclub
  • tacticalgear
  • ethstaker
  • tester
  • normalnudes
  • provamag3
  • anitta
  • Leos
  • megavids
  • lostlight
  • All magazines