/*
 *      Copyright (C) 1994, 1995  Claus-Justus Heine.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.  If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.

 /home/cvs/zftape/ftape-dynmem.c,v
 claus
 *
 1.5
 1995/11/16 22:31:04
 Exp
 *
 *   memory allocation routines. Tries to use remaining parts of dma-buffer
 *   wich is aligned to 32k.
 */

static char RCSid[] =
"ftape-dynmem.c,v 1.5 1995/11/16 22:31:04 claus Exp";

#include "ftape.h"
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/string.h>
#include <asm/segment.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/malloc.h>
#include <linux/major.h>

#include "kernel-interface.h"
#include "ftape-read.h"
#include "ftape-write.h"
#include "ftape-io.h"
#include "ftape-rw.h"
#include "qic80-compress.h"
#include "qic80-vtbl.h"
#include "ftape-dynmem.h"
#include "ftape-bsm.h"
#include "ftape-rw.h"
#include "fdc-isr.h"
#include "kernel-version.h"


int ftape_num_buffers = NR_FTAPE_BUFFERS;

#ifdef DYN_ALLOC
/*
 *  local variables
 */

static struct dma_mem_info
{
  byte *dma_buffer;   /* base address of kmalloced area */
  byte *tape_buffer;  /* base address of 32k aligned dma-buffer */
  int used_num;       /* how many 32k buffers are withing area  */
  int alloc_num;      /* how many 32k buffer fit within area+24 bytes */
  unsigned long dma_remainder;  /* mem at bottom of dma-buffer */
} dma_mem_info[ MAX_DMA_AREAS ];

static struct
{ 
  byte *ptr;
  unsigned long size;
  void **location;
  unsigned dma : 1;
  char *name;
} dyn_mem_info[] =
{
  { NULL, SCRATCH_SIZE        , (void **)&scratch_buffer,      0, "scratch_buffer" },
  { NULL, DEBLOCK_SIZE        , (void **)&deblock_buffer,      0,  "deblock_buffer"},
  { NULL, BAD_SECTOR_MAP_SIZE , (void **)&bad_sector_map,      0,  "bad_sector_map"},
  { NULL, COMPRESSION_MAP_SIZE, (void **)&ftape_compression_map,     0, "ftape_compression_map" },
  { NULL, CMPR_BUFFER_SIZE    , (void **)&ftape_compression_buffer,  0,  "ftape_compression_buffer"},
  { NULL, 0                   , (void **)&ftape_compression_wrk_mem, 0, "ftape_compression_wrk_mem"},
  { NULL, EOF_MAP_SIZE        , (void **)&eof_map,             0, "eof_map" },
  { NULL, BUFF_BUFFER_SIZE    , (void **)&buffer,              0, "buffer" },
  { NULL, __QIC80_VTBL_SIZE   , (void **)&__qic80_vtbl,        0, "internal vtbl" }
};

#define NR_DYN_MEM NR_ITEMS(dyn_mem_info)


#ifdef NO_TRACE_AT_ALL
#define mem_info()
#else
static void
mem_info( void )
{ 
TRACE_FUN( 8, "mem_info");
int i;
int vmalloced = 0;
int dmaed = 0;

  for (i=0;i < MAX_DMA_AREAS; i++ ) {
    dmaed += dma_mem_info[ i ].alloc_num * BUFF_SIZE - 24;
  }
  for(i=0; i < NR_DYN_MEM ;i++ ) {
    if ( !dyn_mem_info[i].dma ) vmalloced += dyn_mem_info[i].size;
  }                                      
  TRACEx1(3,"Total memory dynamically  allocated : %d bytes",
                                            dmaed + vmalloced);
  TRACEx1(3,"      memory allocated with vmalloc : %d bytes",vmalloced);
  TRACEx1(3,"      memory usable for dma         : %d bytes",dmaed);
  TRACEx1(3,"      actually used for dma transfer: %d bytes",
                              ftape_num_buffers * BUFF_SIZE );
  TRACE_EXIT;
} 
#endif

static void
split_dma_mem( void )
{            
TRACE_FUN( 8, "split_dma_mem");
int i, j;                                       
int dma_remainder;

  for ( j=0; j < MAX_DMA_AREAS; j++ ) {
    dma_remainder = dma_mem_info[ j ].dma_remainder;
    for(i=0; i < NR_DYN_MEM ;i++ ) {
      if (    dyn_mem_info[ i ].dma == 0 
           && dma_remainder >= dyn_mem_info[i].size ) {
        dyn_mem_info[i].ptr =   dma_mem_info[ j ].dma_buffer 
                              + dma_remainder
                              - dyn_mem_info[i].size;
        memcpy( dyn_mem_info[i].ptr, 
                *dyn_mem_info[i].location,
                dyn_mem_info[i].size );
        vfree( *dyn_mem_info[i].location );
        *dyn_mem_info[i].location  = dyn_mem_info[i].ptr;
        dma_remainder             -= dyn_mem_info[i].size;
        dyn_mem_info[i].dma        = 1;

        TRACEx6(3,"Used lower dma-buffer dynamic memory allocation at (%08lx) for %s. location: %p, address of location: %p."
                  " Size: %ld, remaining: %d",
                (unsigned long)dyn_mem_info[i].ptr, dyn_mem_info[i].name,
                dyn_mem_info[i].location, &dyn_mem_info[i].location,
                dyn_mem_info[i].size,
                dma_remainder );
      } 
    }
  }
  TRACE_EXIT;
}       

/*
 *  vmalloc() all needed buffers, they will be copied to the dma-buffers 
 *  later.
 */
static int
init_dyn_mem( void )
{            
TRACE_FUN( 8, "init_dyn_mem");
int i;
int result;

  ftape_cmpr_wrk_mem_size = ftape_compress_info();
  result = ftape_cmpr_wrk_mem_size != 0;
  if ( result ) {
    for(i=0; i < NR_DYN_MEM && result;i++ ) {
      if ( dyn_mem_info[i].location == (void **)&ftape_compression_wrk_mem ) {
        dyn_mem_info[i].size = (unsigned long)ftape_cmpr_wrk_mem_size;
        break;
      }
    }
    for(i=0; i < NR_DYN_MEM && result;i++ ) {
      dyn_mem_info[i].ptr       =
      *dyn_mem_info[i].location = vmalloc( dyn_mem_info[i].size );
      dyn_mem_info[i].dma       = 0;
      result                    = dyn_mem_info[i].ptr != NULL;
#ifndef NO_TRACE_AT_ALL
      if ( result ) {
        TRACEx2(3,"allocated successfully %ld bytes at virtual address %08lx.",
                   dyn_mem_info[i].size, (ulong)dyn_mem_info[i].ptr );
      } else {
        TRACEx1(-1,"Error: failed to allocate %ld bytes.",
                    dyn_mem_info[i].size );
      }
#endif
    }              
  }
  TRACE_EXIT;
  return result;
}

/*
 *   copy buffers contained in dma-able memory back to vmalloced areas
 *   restore original state on error.
 */
static int
undma_dyn_mem( void )
{            
TRACE_FUN( 8, "undma_dynmem");
int i,j;
int result= 1;

  for(i=0; i < NR_DYN_MEM  && result >= 0;i++ ) {
    if( dyn_mem_info[i].dma ) {
      dyn_mem_info[i].ptr = vmalloc( dyn_mem_info[i].size );
      result= dyn_mem_info[i].ptr != NULL ? 0 : -ENOMEM;
    }
  }
  if ( result >= 0 ) {
    for(i=0; i < NR_DYN_MEM;i++ ) {
      if( dyn_mem_info[i].dma ) {
        TRACEx4(3,"Trying memcopy( %p, %p, %ld), name: %s", dyn_mem_info[i].ptr, *dyn_mem_info[i].location, dyn_mem_info[i].size, dyn_mem_info[i].name );
        memcpy( dyn_mem_info[i].ptr, *dyn_mem_info[i].location,
                                           dyn_mem_info[i].size );
        dyn_mem_info[i].dma= 0;
        TRACEx1(3, "undmaing %p", *dyn_mem_info[i].location );
        *dyn_mem_info[i].location= dyn_mem_info[i].ptr;
        TRACEx1(3, "new location %p", *dyn_mem_info[i].location );
      }
    }
  } else { /* unsuccessful, restore original state */
    for(j=0; j < i;j++ ) {
      if( dyn_mem_info[j].dma ) {
        vfree( dyn_mem_info[j].ptr );
        dyn_mem_info[j].ptr= *dyn_mem_info[j].location;
      }
    }
  }
  TRACE_EXIT;
  return result;
}

/*
 *   allocate enough dma-memory for num_buffers aligned 32kb buffers.
 *   That is, we need at least 64kb dma memory for a single buffer.
 *   Better is 128kb, which is enough for 3 buffers.
 *
 */
static int
get_dma_buffer ( int num_buffers )
{      
TRACE_FUN( 5, "get_dma_buffer");
int i = 0;
int buffers_allocated = 0;

  for ( i=0; i < MAX_DMA_AREAS; i++ ) {
    dma_mem_info[ i ].tape_buffer  =
    dma_mem_info[ i ].dma_buffer   = NULL;
    dma_mem_info[ i ].used_num      =
    dma_mem_info[ i ].alloc_num     =
    dma_mem_info[ i ].dma_remainder = 0;
  }        
  TRACEi(4,"buffers requested: ", num_buffers );
  i = 0;
  while( buffers_allocated < num_buffers && i < MAX_DMA_AREAS ) {
    TRACEi(4,"attempt ", i);
    /*
     *  Find the number of buffer we want to allocate.
     *  We actually allocate 128k or 64k as we miss 24 bytes for memory management
     */
    for( dma_mem_info[ i ].alloc_num = 1; 
         dma_mem_info[ i ].alloc_num <= (num_buffers - buffers_allocated); 
         dma_mem_info[ i ].alloc_num <<= 1 );
    if ( dma_mem_info[ i ].alloc_num > 4 ) {
      dma_mem_info[ i ].alloc_num = 4;
    }
    while (    dma_mem_info[ i ].dma_buffer == NULL
            && dma_mem_info[ i ].alloc_num >= 2 ) {

      TRACEx1(4,"trying to allocate %d buffers", dma_mem_info[ i ].alloc_num );
      TRACEx2(3,"alloc_num[ %d ]: %d", i, dma_mem_info[ i ].alloc_num );

      /*
       *  try to allocate the dma-buffer.
       */
      dma_mem_info[ i ].dma_buffer =
                         kmalloc( dma_mem_info[ i ].alloc_num * BUFF_SIZE - 24,
                                  GFP_KERNEL | GFP_DMA );
      if ( dma_mem_info[ i ].dma_buffer != NULL ) {

        TRACEx4(3,"dma_buffer[ %d ] %08lx, dma_buffer + BUFF_SIZE - 24: %08lx, "
                  "num_buffers: %d ",
                   i,
                   (unsigned long)dma_mem_info[ i ].dma_buffer,
                   ((unsigned long )dma_mem_info[ i ].dma_buffer) + BUFF_SIZE - 24,
                   buffers_allocated + dma_mem_info[ i ].alloc_num - 1 );
        /*
         *  align dma-buffer
         */
        dma_mem_info[ i ].tape_buffer= 
                   (char *)((   (unsigned long)dma_mem_info[ i ].dma_buffer
                              + BUFF_SIZE-1)&~(BUFF_SIZE-1));
      } else {              
        /*
         *  setup for retry with less buffers
         */
        dma_mem_info[ i ].alloc_num >>= 1;
      }  
    }
    if ( dma_mem_info[ i ].dma_buffer == NULL ) {
      dma_mem_info[ i ].alloc_num = 0;
    } else {
      buffers_allocated          += dma_mem_info[ i ].alloc_num - 1;
      dma_mem_info[ i ].used_num  = dma_mem_info[ i ].alloc_num - 1;
      if ( buffers_allocated > num_buffers ) {
        dma_mem_info[ i ].used_num += num_buffers - buffers_allocated;
      }
    }
    i ++;
  }
  TRACE_EXIT;
  return buffers_allocated <= num_buffers ? buffers_allocated : num_buffers;
}


/*
 *  determine size of unused dma memory
 */
static void
init_dma_info ( int num_buffers)
{      
TRACE_FUN( 8, "init_dma_info");
int i;

  for ( i=0; i < MAX_DMA_AREAS; i++ ) {
    dma_mem_info[ i ].dma_remainder =   
                                  (unsigned long)dma_mem_info[ i ].tape_buffer
                                - (unsigned long)dma_mem_info[ i ].dma_buffer;
    TRACEx2(3,"dma_remainder[ %d ]: %ld", i, dma_mem_info[ i ].dma_remainder );
  }
  TRACE_EXIT;
}

void
ftape_cleanup_mem( void )
{          
TRACE_FUN( 5, "ftape_cleanup_mem");
int i;

  for ( i= 0; i < MAX_DMA_AREAS; i++ ) {
    if ( dma_mem_info[ i ].dma_buffer != NULL ) {
      kfree( dma_mem_info[ i ].dma_buffer );
    }
  }
  for(i=0; i < NR_DYN_MEM ;i++ ) {
    if ( !dyn_mem_info[i].dma &&  dyn_mem_info[i].ptr != NULL ) {
      vfree( dyn_mem_info[i].ptr );
      dyn_mem_info[i].ptr= NULL;
    }
    dyn_mem_info[i].dma= 0;
    *dyn_mem_info[i].location= NULL;
  }
  TRACE_EXIT;
}

/*
 *  called by MTSETDRVBUFFER
 */
int
ftape_change_dma_size( int num_buffers )
{    
TRACE_FUN( 5, "ftape_change_dma_size");
struct dma_mem_info old_dma_info[ MAX_DMA_AREAS ];
int result;
int new_num_buffers;
int max_num_buffers;
int i;
int min, max, ready;
int swap_alloc;
byte *swap_buffer;

  result = undma_dyn_mem();
  if( result >= 0 ) {
    /*
     *  we try to allocate in any case the new amount of buffers.
     *  This is likely to fail if the physical memory is 
     *  heavily fragmented (which is most often the case on my machine)
     *  If the allocation failes, we use the old dma-area. This has 
     *  the disadvantage that we can't decrease the amount of memory
     *  that is actually occupied by the driver if the physical memory 
     *  is quite fragmented. 
     *  I don't want to implement more sophisticated strategies now. 
     *  I don't think that this feature is really useful. The driver
     *  won't work properly with less than 2 buffers.
     */
    memcpy( old_dma_info, dma_mem_info, sizeof(dma_mem_info) );
    new_num_buffers= get_dma_buffer ( num_buffers );
    if ( new_num_buffers < num_buffers ) {                                                                                      
      /*
       *   calculate the maximum of buffers that are available
       *   with the old buffer
       */
      for ( max_num_buffers = 0, i = 0; i < MAX_DMA_AREAS; i ++ ) {
        if ( old_dma_info[ i ].alloc_num >= 2 ) {

          TRACEx2(4, "old_dma_info[ %d ].alloc_num = %d", 
                      i, old_dma_info[ i ].alloc_num  );

          max_num_buffers += old_dma_info[ i ].alloc_num - 1;
        }
      }
      TRACEi(4,"maximum number of buffers with old memory: ", max_num_buffers );

      /*
       *   maybe we don't need more memory.
       */

      if ( num_buffers > max_num_buffers ) {
        TRACEx2(1,"Couldn't allocate new buffers (%d), merging old ones (%d) with new ones ...", num_buffers, ftape_num_buffers );
        /*
         *  now we try to put as much dma as possible into the dma_mem_info struct
         */
        ready = 0;
        while ( !ready ) {
          /*
           *   find maximum block in new dma struct
           */                                      
          for( max = 0, i = 1;i < MAX_DMA_AREAS; i++ ){
            if ( dma_mem_info[ i ].alloc_num > dma_mem_info[ max ].alloc_num ) {
              max = i;
            }
          }   
          /*
           *  find minimum in old dma-struct
           */
          for( min = 0, i = 1;i < MAX_DMA_AREAS; i++ ){
            if ( old_dma_info[ i ].alloc_num < old_dma_info[ min ].alloc_num ) {
              min = i;
            }
          }
          /*
           *  if maximum block in new struct is bigger than minimum in old struct,
           *  replace the old minimum by the new maximum. Else terminate.
           */
          if ( dma_mem_info[ max ].alloc_num > old_dma_info[ min ].alloc_num ) {

            TRACEx6(3,"exchanging %d buffers at %p (%d) with %d buffers at %p (%d)",
                       old_dma_info[ min ].alloc_num,
                       old_dma_info[ min ].dma_buffer,               
                       min,
                       dma_mem_info[ max ].alloc_num,
                       dma_mem_info[ max ].dma_buffer,
                       max );

            swap_buffer = old_dma_info[ min ].dma_buffer;
            swap_alloc  = old_dma_info[ min ].alloc_num;
            old_dma_info[ min ].dma_buffer = dma_mem_info[ max ].dma_buffer;
            old_dma_info[ min ].alloc_num  =  dma_mem_info[ max ].alloc_num;
            dma_mem_info[ max ].dma_buffer = swap_buffer;
            dma_mem_info[ max ].alloc_num  = swap_alloc;
          } else {
            ready = 1;
          }
        }
      }
      /*
       *  free the new buffers that have already been allocated.
       */
      for ( i = 0; i < MAX_DMA_AREAS; i++ ) {
        if ( dma_mem_info[ i ].dma_buffer ) {
          kfree( dma_mem_info[ i ].dma_buffer );
        }
      }
      /*
       *   restore old buffers
       */
      memcpy( dma_mem_info, old_dma_info, sizeof(dma_mem_info) );
      /*
       *   calculate the maximum of buffers that are available
       */
      for ( max_num_buffers = 0, i = 0; i < MAX_DMA_AREAS; i ++ ) {
        if ( dma_mem_info[ i ].alloc_num >= 2 ) {

          TRACEx2(4, "dma_mem_info[ %d ].alloc_num = %d", 
                      i, dma_mem_info[ i ].alloc_num  );

          max_num_buffers += dma_mem_info[ i ].alloc_num - 1;
        }
      }
      TRACEi(4,"maximum number of buffers: ", max_num_buffers );
      /*
       *  maybe everything is fine.
       */
      if ( num_buffers <= max_num_buffers ) {
        ftape_num_buffers= num_buffers;
      } else {
        TRACE(1,"Couldn't allocate new dma-buffer.");
        result= -ENOMEM;
        num_buffers       =
        ftape_num_buffers = max_num_buffers;
      }               
      /*
       *  now determine the start of the memory areas used for dma transfer
       *  the unused areas that can be occupied by ordinary buffer.
       */
      for ( i = 0; i < MAX_DMA_AREAS; i ++ ) {
        if ( num_buffers > 0 ) {
          dma_mem_info[ i ].used_num = dma_mem_info[ i ].alloc_num - 1;
          num_buffers -= dma_mem_info[ i ].used_num;
          if ( num_buffers < 0 ) {
            dma_mem_info[ i ].used_num += num_buffers;
          }
          dma_mem_info[ i ].tape_buffer =    dma_mem_info[ i ].dma_buffer - 24
                                          + (   dma_mem_info[ i ].alloc_num
                                              - dma_mem_info[ i ].used_num
                                            ) * BUFF_SIZE;
        } else {
          /*
           *  mark buffer as unused, maybe free allocated buffer
           */
          dma_mem_info[ i ].dma_remainder =
          dma_mem_info[ i ].used_num       =
          dma_mem_info[ i ].alloc_num      = 0;
          if ( dma_mem_info[ i ].dma_buffer ) {
            TRACEi(3,"freeing superfluous buffer ", i);
            kfree( dma_mem_info[ i ].dma_buffer );
          }
          dma_mem_info[ i ].tape_buffer =
          dma_mem_info[ i ].dma_buffer  = NULL;
        }
        TRACEx2(4,"use %d buffers of dma-buffer area %d", dma_mem_info[ i ].used_num, i );
      }
    } else {
      /*
       *   everything is fine, free old buffers.
       */
      for ( i = 0; i < MAX_DMA_AREAS; i++ ) {
        if ( old_dma_info[ i ].dma_buffer ) {
          kfree( old_dma_info[ i ].dma_buffer );
        }
      }
      ftape_num_buffers = num_buffers;
    } 
    /* 
     *  determine free memory areas
     */
    init_dma_info ( ftape_num_buffers);
    /* 
     *  move vmalloced memory to unused dma-able memory
     */
    split_dma_mem( );                                  
    /*
     *  say something if not NO_TRACE_AT_ALL
     */
    mem_info();
  } else {
    TRACE(-1,"Error: Couldn't reallocate buffers");
  }
  TRACE_EXIT;
  return result;
} 

/*
 *   called by init_module() to install the initial tape-buffer.
 */
int
ftape_get_tape_buffer( int *num_buffers )
{
TRACE_FUN( 5, "ftape_get_tape_buffer");


  *num_buffers= get_dma_buffer ( NR_FTAPE_BUFFERS );
  if ( *num_buffers == 0 || !init_dyn_mem() ) {
    ftape_cleanup_mem();
    TRACE_EXIT;
    return -ENOMEM;
  }                                        
  init_dma_info ( *num_buffers);
  split_dma_mem();
  mem_info();
  TRACE_EXIT;
  return 0;
}

/*
 *   called by ftape_zap_read_buffers() to initialize the buffer 
 *   struct with the proper buffer addresses.
 */
void ftape_init_tape_buffer ( void )
{
TRACE_FUN( 8, "ftape_init_tape_buffer");
int i, j, k;

  k = 0;
  for ( j = 0; j < MAX_DMA_AREAS; j ++ ) {
    for ( i=0; i < dma_mem_info[ j ].used_num; i++  ) {
      buffer[ k ].address = dma_mem_info[ j ].tape_buffer + i * BUFF_SIZE;
      TRACEx1(8,"tape buffer at %p", buffer[ k ].address );
      k ++;
    }
  }
  TRACE_EXIT;
}

#endif


/*{{{}}}*/
