/* This is part of the NEWTRACK eyetracking software, (c) 2004 by  */
/* Eric Auer. NEWTRACK is free software; you can redistribute it   */
/* and modify it under the terms of the GNU General Public License */
/* as published by the Free Software Foundation; either version 2  */
/* of the License, or (at your option) any later version.          */
/*     NEWTRACK is distributed in the hope that it will be useful, */
/*     but WITHOUT ANY WARRANTY; without even the implied warranty */
/*     of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.     */
/*     See the GNU General Public License for more details.        */
/* You should have received a copy of the GNU General Public       */
/* License (license.txt) along with this program; if not, check    */
/* www.gnu.org or write to the Free Software Foundation, Inc., 59  */
/* Temple Place, Suite 330, Boston, MA  02111-1307 USA.            */

/* Modify graphics mode to use higher refresh rates, requires VESA VBE 3 */
/* Note: This is a stripped down version of my VBE3FAST tool. The EGA,   */
/* double-scan (low resolution) and TEXT mode support is removed. For    */
/* text mode, most cards can reach 45 or 56 MHz, but not much more, even */
/* if the graphics bandwidth is much higher. Reason: Font engine limits. */

#include "vesa.h"	/* general VESA stuff and common libraries */

#include <dpmi.h>       /* to call the BIOS -- or use dos.h */

#define MIO 1000000UL    /* guess what. One million. */

#define MAXFRAMES 140UL	/* maximum framerate in Hz */
#define GUESSXREF 120UL	/* percentage factor width -> htotal for first guess */
#define CRTCFIXUP 0	/* set to enforce CRTC hsyncpos <= hvisible   */
#define VBE3SIMULATION 0	/* set to let negative kHz start simulation */

#define DMODE 0		/* set for debug mode */ 
#define DMODE2 0	/* set for more verbose debug mode */ 
#if DMODE2
#undef DMODE
#define DMODE 1     /* verbose debug mode implies normal debug mode */
#endif


/* return closest possible hardware clock for specified desired pixel clock */
uint32 realclock(uint32 desiredclock, uint16 mode);

uint32 realclock(uint32 desiredclock, uint16 mode)
{
  __dpmi_regs r;

  r.x.ax = 0x4f0b;
  r.x.bx = 0;
  r.d.ecx = desiredclock;
  r.x.dx = mode;	/* because clock choice can depend on mode */
  __dpmi_int(0x10,&r);
  if (r.x.ax != 0x004f)	/* error detected?  */
#if VBE3SIMULATION
    return desiredclock;
#else
    return 0;
#endif
  return r.d.ecx;
}


#define OUT_OF_RANGE 1	/* hrefresh < 30 kHz, vrefresh < 49.5 Hz, resolution */
			/* or pixel clock out of range */
#define PARAM_ERROR 2	/* problem with CRTC param calculations, BIOS or PLL */
  /* modeflags and 0x17 is 0x07: reachable text mode. 6..6: any TTYable mode */
  /*  VBE3FAST allowed 40..1600 x 15..1200 (chars or pixels) at 30..100 kHz  */


/* apart from khz and modenumber, this requires a valid vesamode structure. */
/* returns 0 if everything worked */
int vbe3refresh(int khz, uint16 modenumber)
{
  int width, height;
#if VBE3SIMULATION
  int simulate;
#endif
  uint32 xref;
  CRTC_PARAMS cparm;
  uint32 yref1, clock1, i;
  uint32 hpulse, vpulse, hpos1, hpos2, vpos1, vpos2;
  uint16 cursorpos;
  __dpmi_regs r;

#if VBE3SIMULATION
  if (!khz)
    return 0;			/* no refresh rate adjustment requested */
  simulate = (khz < 0);		/* true if not VBE 3.0 */
  khz = (khz < 0) ? (-khz) : khz;
#else
  if (khz < 30)
    return 0;			/* just ignore simulations and nonsense */
#endif

  width = vesamode.width;
  height = vesamode.height;

  if ( (width < 640) || (height < 480) || (width > 1600) || (height > 1200) ||
      (khz < 30) || (khz > 180) || (!(vesamode.modeflags & 0x10)) )
    return OUT_OF_RANGE; /* out of range, or text mode. */
    /* actual width is width*charwidth if text mode, etc.! */
    /* charwidth 9 means more pixel clock at same char line clock */

  xref = (uint32)khz * 1000UL; /* modeflags: (F & 1) -> reachable. (F & 2) -> 2. */
  /* (F & 4) -> has BIOS-TTY. (F & 8) -> color. (F & 0x10) -> graphics. */
  /* (F & 0x20) -> VGA compatible I/O. (F & 0x40) -> VGA memory banks. */
  /* (F & 0x80) -> LFB supported. (F & 0x100) -> allows doublescan. */
  /* (F & 0x200) -> allows interlace. (F & 0x400) -> allows triplebuffer. */
  /* (F & 0x800) -> hardware stereo display possible (2 alternating pages) */
  /* (F & 0x1000) -> dual display start address available */


  clock1 = (GUESSXREF*vesamode.width/100UL) * xref; /* take a guess based on xref */
    /* assume htotal = 125% width? Or better more? */

#if VBE3SIMULATION
  if ((!vesamode.maxclock) || (simulate)) /* if simulation mode */
#else
  if (vesamode.maxclock < (48 * MIO))	/* if nonsense value */
#endif
    vesamode.maxclock = 160UL * MIO;	/* invent 160 (was: 110) MHz DAC */
  if (vesamode.maxclock > (300UL*MIO))	/* too high to be useful */
      vesamode.maxclock = 300UL*MIO;	/* allows 1600x1200 156 Hz */

  if (clock1 > vesamode.maxclock)
    return OUT_OF_RANGE;

  yref1 = xref / (105UL*vesamode.height/100UL);	/* yref estimate */
    /* using real xref, not estimate: we want to hit the   exact    */
    /* desired hrefresh rate later and vrefresh depends on hrefresh */
    /* we can have much more than 5% blank lines, so let us be less */
    /* pessimistic about ending up with too high vrefresh rate...   */
  /* first estimate here: xref = clock1 / (GUESSXREF*vesamode.width/100UL) */

  clock1 = realclock(clock1, modenumber); /* find next possible hardware clock */
  /* should not differ much from desired clock - depends on hardware flexibility */

  if (clock1 < (10UL*MIO))	/* less than 10 MHz is too strange */
    return PARAM_ERROR;		/* could try to continue here by ignoring realclock output */

  /* CRTs are often happiest with 1-4ysec. Unit: pixels. EGA used 4ysec. */
  /* older CRTs want 2ysec or even 3ysec. TFT accept anything 1-8ysec.   */
  /* *** If the pulse is too short, image will be too wide. *** */
  hpulse = clock1 / 480000UL; /* 2.1ysec */
  /* EGA/VGA uses roughly 1.6ysec. SVGATextMode suggests "anything > 0". */
  /* *** If this value is too small, right margin will be too small ***  */
  hpos1 = clock1 / 2500000UL; /* 0.4ysec */ /* (hpulse++ -> hpos1-- ok)  */
  /* post-sync should be e.g. 1.2-3.8ysec. Using 1.3ysec for now...?     */
  /* EGA only has 1ysec? Nice minimum: pre 0.5 pulse 1.2 post 1.3 sum 4  */
  /* *** > 1.5ysec / < 1.3ysec -> too much / too small left margin. ***  */
  hpos2 = clock1 / 720000UL; /* 1.4ysec */
  /* is getting a sum of >= 4ysec important?  */

  /* rounding: granularity is charwidth for text modes, otherwise 4 */
  hpulse = (hpulse | 3) + 1;	/* round up to next multiple of 4 */
  hpos1  = (hpos1 | 3) + 1;
  hpos2  = (hpos2 | 3) + 1;

  i = clock1 / (vesamode.width + hpos1 + hpulse + hpos2);
  if ((i/97UL) < (xref/100UL)) { /* considerably too LOW hrefresh clock? */
#if DMODE2
    printf("Pumping dot clock by 6 percent, hrefresh was too low.");
#endif
    clock1 = realclock((clock1/100UL) * 108UL, modenumber);
    /* find next possible hardware clock */
    /* if no useful result, could assume realclock(x,y) being x... */

    if (clock1 < (10UL*MIO))	/* less than 10 MHz is too strange */
      return PARAM_ERROR;	/* could try to continue here by ignoring realclock output */

    i = clock1 / (vesamode.width + hpos1 + hpulse + hpos2);
#if DMODE2
      printf(" %lu MHz now.\r\n", clock1/MIO);
#endif
  } /* pumping clock up */

#if DMODE2
  if (i > xref) /* maximum hrefresh > desired hrefresh? */
    printf("Maximum possible hrefresh clock: %lu (%u + %lu + %lu + %lu)\r\n", i,
      vesamode.width, hpos1, hpulse, hpos2);
#endif

  while (i > xref) { /* inflate blanking area to lower hrefresh rate */
    hpulse += 4; /* one "GRANULARITY UNIT" more */
    i = clock1 / (vesamode.width + hpos1 + hpulse + hpos2);
    /* more hpos1 moves image LEFT and increases crtc[4] and crtc[5] ? */
    /* more hpulse / less hpos2 (?) has the same effect ? */
    /* less hpos1 brings the image closer to the right border. */
  }; /* bring hrefresh rate down again */

#if DMODE
  printf("Found hrefresh %lu.%03lu kHz at %u + %u + %u + %u.\r\n",
    i/1000UL, i%1000UL, vesamode.width, hpos1, hpulse, hpos2);
#endif

  if (i < 30000UL)	/* xrefresh below 30 kHz is not acceptable */
    return OUT_OF_RANGE;

  if (vesamode.width < ((hpos1 + hpulse + hpos2)*2))
    return PARAM_ERROR;
    /* Too much of each line is blanking. Started with too high pixel clock? */

  /* now that we know that we have i lines per second, we can use that */
  /* to create an 50-300ysec (TFT: 40-500ysec?) vsync pulse. Two lines */
  /* (or 1..6) are popular. But a bit low for high frequency modes...? */
  vpulse = (i / 15625UL) + 1; /* 64ysec (min 45ysec recommended) */

  /* 100ysec or even less seem to be okay but 300ysec are popular... */
  vpos1 = (i / 3333UL) + 1; /* 300ysec */

  /* back porch of vertical blanking is long for CRTs... 600ysec...  */
  /* 400 / 480 line modes use 1msec here! Higher -> 0.3-0.7msec. */
  vpos2 = (i / 3125UL) + 1; /* 320ysec (conservative: 500ysec) */
  /* sum of all vertical blanking should be something above 0.6msec  */
  /* *** If vpulse+vpos2 < 350ysec, top margin will be too small *** */

  yref1 = i * 100UL;
  yref1 /= (vesamode.height + vpos1 + vpulse + vpos2);

#if DMODE2
  if (yref1 > ((100UL * MAXFRAMES)+50))
    printf("Will shrink framerate from %lu to %lu Hz now...\r\n",
      (yref1+50)/100UL, MAXFRAMES);
#endif

  do {
    yref1 = i * 100UL;
    yref1 /= (vesamode.height + vpos1 + vpulse + vpos2);
    if (yref1 > (100UL * MAXFRAMES)) {
      vpos2++;
    }
  } while (yref1 > (100UL * MAXFRAMES));

#if DMODE
  printf("Found vrefresh %lu.%02lu Hz at %u + %u + %u + %u.\r\n",
    yref1/100UL, yref1%100UL, vesamode.height, vpos1, vpulse, vpos2);
#endif

  if (yref1 < 4950)	/* must be at least 49.50 Hz framerate */
    return OUT_OF_RANGE;

  cparm.vrefresh = yref1; /* unit 1/100 Hz */
    /* must be 100 * clock / (htotal * vtotal) */
  cparm.clock = clock1;
  cparm.crtcflags = 0; /* no double scan, no interlace, neg/neg sync  */
    /* +h -v is classic flag for 350 lines, -h +v for 400 lines etc.! */

  cparm.hsync1 = vesamode.width + hpos1;
  cparm.hsync2 = cparm.hsync1 + hpulse;
  cparm.htotal = cparm.hsync2 + hpos2;

  cparm.vsync1 = vesamode.height + vpos1;
  cparm.vsync2 = cparm.vsync1 + vpulse;
  cparm.vtotal = cparm.vsync2 + vpos2;

#if 0
xx  printf("Modeline \"%ux%u\"    %lu.%02lu  %4u %4u %4u %4u  ",
xx    vesamode.width, vesamode.height, clock1/MIO, (clock1/(MIO/100UL)) % 100UL,
xx    vesamode.width, cparm.hsync1, cparm.hsync2, cparm.htotal);
xx  printf("%4u %4u %4u %4u\r\n",
xx    vesamode.height, cparm.vsync1, cparm.vsync2, cparm.vtotal);
xx  /* for text modes (where BIOS rejects vrefresh above 7200!), use: */
xx  /* 'Modeline "widthxheightxcharwidth" ... vtotal font charwidthxcharheight' */
xx  /* VESA text modes are 108..10c */
#endif	

  /* everything went okay until now, so we try to change the mode */
  r.x.ax = 0x0300;	/* get cursor pos (and size) */
  r.x.bx = 0;		/* page BH */
  __dpmi_int(0x10, &r);
  cursorpos = r.x.dx;

  { /* need a transfer buffer */
    int segV, selV;

    segV = __dpmi_allocate_dos_memory((sizeof(cparm)+15)>>4, &selV);
      /* size unit is 16 bytes (paragraphs) */
    if (segV == -1)
      return PARAM_ERROR;	/* out of DOS memory */
    _movedatab(_my_ds(), (uint32)&(cparm.htotal), selV, 0,
      sizeof(cparm));		/* copy cparm to DOS memory */
          
    r.x.ax = 0x4f02;
    r.x.bx = modenumber | 0x800 | 0x8000; /* custom CRTC, preserve image */
    r.x.es = segV; /* DOS segment */
    r.x.di = 0; /* offset */
#if VBE3SIMULATION
    if (simulate) {
      int n;
      printf("CRTC parameter area dump for mode %x:\n", modenumber);
      for (n=0; n < sizeof(cparm); n++) {
        printf("%2.2x ", (int)(peekb(segV,n)));
        if ((n & 15) == 15) printf("\n");
      }
      printf("\nPress any key to continue.\n");
      (void)getxkey();
    } else {
      __dpmi_int(0x10, &r);
    }
#else
    __dpmi_int(0x10, &r);
#endif
    __dpmi_free_dos_memory(selV);
  } /* transfer buffer released again */

  if (r.x.ax != 0x004f)
    return PARAM_ERROR;
    /* Mode setting failed, code r.x.ax */

  r.x.ax = 0x0200;	/* set cursor pos */
  r.x.bx = 0;		/* page BH */
  r.x.dx = cursorpos;
  __dpmi_int(0x10, &r);


#if CRTCFIXUP
#define Xinp(port) in8(port) /* add a delay here if you want */
  { /* CRTC fixup */
    int a, b;
    uint16 io = 0x3d4;	/* or use: (Xinp(0x3c2) & 1) ? 0x3d4 : 0x3b4; */

    disable();
    out8(io, 1);	/* CRTC: get hvisible */
    a = Xinp(io+1);
    out8(io, 2);	/* CRTC: get hsyncpos */
    b = Xinp(io+1);
    if (a > b) {	/* hsyncpos must be <= hvisible */
#if DMODE
      printf("FIXUP: moving hsyncpos into range.\r\n");
#endif
      out8(io, 0x11);
      b = Xinp(io+1);
      out8(io, 0x11);
      out8(io+1, b & ~0x80);	/* registers 0..7 are un-protected now */
      out8(io, 2);
      out8(io+1, a + 1);	/* hsyncpos = hvisible+1 */
    } /* CRTC fixup triggered */
  } /* CRTC fixup */
#endif /* CRTCFIXUP */

  return 0;	/* everything worked out okay */
} /* vbe3refresh */

