llio.texi 200 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989
  1. @node Low-Level I/O, File System Interface, I/O on Streams, Top
  2. @c %MENU% Low-level, less portable I/O
  3. @chapter Low-Level Input/Output
  4. This chapter describes functions for performing low-level input/output
  5. operations on file descriptors. These functions include the primitives
  6. for the higher-level I/O functions described in @ref{I/O on Streams}, as
  7. well as functions for performing low-level control operations for which
  8. there are no equivalents on streams.
  9. Stream-level I/O is more flexible and usually more convenient;
  10. therefore, programmers generally use the descriptor-level functions only
  11. when necessary. These are some of the usual reasons:
  12. @itemize @bullet
  13. @item
  14. For reading binary files in large chunks.
  15. @item
  16. For reading an entire file into core before parsing it.
  17. @item
  18. To perform operations other than data transfer, which can only be done
  19. with a descriptor. (You can use @code{fileno} to get the descriptor
  20. corresponding to a stream.)
  21. @item
  22. To pass descriptors to a child process. (The child can create its own
  23. stream to use a descriptor that it inherits, but cannot inherit a stream
  24. directly.)
  25. @end itemize
  26. @menu
  27. * Opening and Closing Files:: How to open and close file
  28. descriptors.
  29. * I/O Primitives:: Reading and writing data.
  30. * File Position Primitive:: Setting a descriptor's file
  31. position.
  32. * Descriptors and Streams:: Converting descriptor to stream
  33. or vice-versa.
  34. * Stream/Descriptor Precautions:: Precautions needed if you use both
  35. descriptors and streams.
  36. * Scatter-Gather:: Fast I/O to discontinuous buffers.
  37. * Copying File Data:: Copying data between files.
  38. * Memory-mapped I/O:: Using files like memory.
  39. * Waiting for I/O:: How to check for input or output
  40. on multiple file descriptors.
  41. * Synchronizing I/O:: Making sure all I/O actions completed.
  42. * Asynchronous I/O:: Perform I/O in parallel.
  43. * Control Operations:: Various other operations on file
  44. descriptors.
  45. * Duplicating Descriptors:: Fcntl commands for duplicating
  46. file descriptors.
  47. * Descriptor Flags:: Fcntl commands for manipulating
  48. flags associated with file
  49. descriptors.
  50. * File Status Flags:: Fcntl commands for manipulating
  51. flags associated with open files.
  52. * File Locks:: Fcntl commands for implementing
  53. file locking.
  54. * Open File Description Locks:: Fcntl commands for implementing
  55. open file description locking.
  56. * Open File Description Locks Example:: An example of open file description lock
  57. usage
  58. * Interrupt Input:: Getting an asynchronous signal when
  59. input arrives.
  60. * IOCTLs:: Generic I/O Control operations.
  61. * Other Low-Level I/O APIs:: Other low-level-I/O-related functions.
  62. @end menu
  63. @node Opening and Closing Files
  64. @section Opening and Closing Files
  65. @cindex opening a file descriptor
  66. @cindex closing a file descriptor
  67. This section describes the primitives for opening and closing files
  68. using file descriptors. The @code{open} and @code{creat} functions are
  69. declared in the header file @file{fcntl.h}, while @code{close} is
  70. declared in @file{unistd.h}.
  71. @pindex unistd.h
  72. @pindex fcntl.h
  73. @deftypefun int open (const char *@var{filename}, int @var{flags}[, mode_t @var{mode}])
  74. @standards{POSIX.1, fcntl.h}
  75. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
  76. The @code{open} function creates and returns a new file descriptor for
  77. the file named by @var{filename}. Initially, the file position
  78. indicator for the file is at the beginning of the file. The argument
  79. @var{mode} (@pxref{Permission Bits}) is used only when a file is
  80. created, but it doesn't hurt to supply the argument in any case.
  81. The @var{flags} argument controls how the file is to be opened. This is
  82. a bit mask; you create the value by the bitwise OR of the appropriate
  83. parameters (using the @samp{|} operator in C).
  84. @xref{File Status Flags}, for the parameters available.
  85. The normal return value from @code{open} is a non-negative integer file
  86. descriptor. In the case of an error, a value of @math{-1} is returned
  87. instead. In addition to the usual file name errors (@pxref{File
  88. Name Errors}), the following @code{errno} error conditions are defined
  89. for this function:
  90. @table @code
  91. @item EACCES
  92. The file exists but is not readable/writable as requested by the @var{flags}
  93. argument, or the file does not exist and the directory is unwritable so
  94. it cannot be created.
  95. @item EEXIST
  96. Both @code{O_CREAT} and @code{O_EXCL} are set, and the named file already
  97. exists.
  98. @item EINTR
  99. The @code{open} operation was interrupted by a signal.
  100. @xref{Interrupted Primitives}.
  101. @item EISDIR
  102. The @var{flags} argument specified write access, and the file is a directory.
  103. @item EMFILE
  104. The process has too many files open.
  105. The maximum number of file descriptors is controlled by the
  106. @code{RLIMIT_NOFILE} resource limit; @pxref{Limits on Resources}.
  107. @item ENFILE
  108. The entire system, or perhaps the file system which contains the
  109. directory, cannot support any additional open files at the moment.
  110. (This problem cannot happen on @gnuhurdsystems{}.)
  111. @item ENOENT
  112. The named file does not exist, and @code{O_CREAT} is not specified.
  113. @item ENOSPC
  114. The directory or file system that would contain the new file cannot be
  115. extended, because there is no disk space left.
  116. @item ENXIO
  117. @code{O_NONBLOCK} and @code{O_WRONLY} are both set in the @var{flags}
  118. argument, the file named by @var{filename} is a FIFO (@pxref{Pipes and
  119. FIFOs}), and no process has the file open for reading.
  120. @item EROFS
  121. The file resides on a read-only file system and any of @w{@code{O_WRONLY}},
  122. @code{O_RDWR}, and @code{O_TRUNC} are set in the @var{flags} argument,
  123. or @code{O_CREAT} is set and the file does not already exist.
  124. @end table
  125. @c !!! umask
  126. If on a 32 bit machine the sources are translated with
  127. @code{_FILE_OFFSET_BITS == 64} the function @code{open} returns a file
  128. descriptor opened in the large file mode which enables the file handling
  129. functions to use files up to @twoexp{63} bytes in size and offset from
  130. @minus{}@twoexp{63} to @twoexp{63}. This happens transparently for the user
  131. since all of the low-level file handling functions are equally replaced.
  132. This function is a cancellation point in multi-threaded programs. This
  133. is a problem if the thread allocates some resources (like memory, file
  134. descriptors, semaphores or whatever) at the time @code{open} is
  135. called. If the thread gets canceled these resources stay allocated
  136. until the program ends. To avoid this calls to @code{open} should be
  137. protected using cancellation handlers.
  138. @c ref pthread_cleanup_push / pthread_cleanup_pop
  139. The @code{open} function is the underlying primitive for the @code{fopen}
  140. and @code{freopen} functions, that create streams.
  141. @end deftypefun
  142. @deftypefun int open64 (const char *@var{filename}, int @var{flags}[, mode_t @var{mode}])
  143. @standards{Unix98, fcntl.h}
  144. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
  145. This function is similar to @code{open}. It returns a file descriptor
  146. which can be used to access the file named by @var{filename}. The only
  147. difference is that on 32 bit systems the file is opened in the
  148. large file mode. I.e., file length and file offsets can exceed 31 bits.
  149. When the sources are translated with @code{_FILE_OFFSET_BITS == 64} this
  150. function is actually available under the name @code{open}. I.e., the
  151. new, extended API using 64 bit file sizes and offsets transparently
  152. replaces the old API.
  153. @end deftypefun
  154. @deftypefun int openat (int @var{filedes}, const char *@var{filename}, int @var{flags}[, mode_t @var{mode}])
  155. @standards{POSIX.1, fcntl.h}
  156. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
  157. This function is the descriptor-relative variant of the @code{open}
  158. function. @xref{Descriptor-Relative Access}.
  159. Note that the @var{flags} argument of @code{openat} does not accept
  160. @code{AT_@dots{}} flags, only the flags described for the @code{open}
  161. function above.
  162. The @code{openat} function can fail for additional reasons:
  163. @table @code
  164. @item EBADF
  165. The @var{filedes} argument is not a valid file descriptor.
  166. @item ENOTDIR
  167. The descriptor @var{filedes} is not associated with a directory, and
  168. @var{filename} is a relative file name.
  169. @end table
  170. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this
  171. function is in fact @code{openat64} since the LFS interface transparently
  172. replaces the normal implementation.
  173. @end deftypefun
  174. @deftypefun int openat64 (int @var{filedes}, const char *@var{filename}, int @var{flags}[, mode_t @var{mode}])
  175. @standards{GNU, fcntl.h}
  176. The large-file variant of the @code{openat}, similar to how
  177. @code{open64} is the large-file variant of @code{open}.
  178. When the sources are translated with @code{_FILE_OFFSET_BITS == 64} this
  179. function is actually available under the name @code{openat}. I.e., the
  180. new, extended API using 64 bit file sizes and offsets transparently
  181. replaces the old API.
  182. @end deftypefun
  183. @deftp {Data Type} {struct open_how}
  184. @standards{Linux, fcntl.h}
  185. The @code{open_how} structure describes how to open a file using @code{openat2}.
  186. @strong{Portability note:} In the future, additional fields can be added
  187. to @code{struct open_how}, so that the size of this data type increases.
  188. Do not use it in places where this matters, such as structure fields in
  189. installed header files, where such a change could affect the application
  190. binary interface (ABI).
  191. The following generic fields are available.
  192. @table @code
  193. @item flags
  194. This field specifies the file creation and file status flags to use when
  195. opening the file.
  196. All of the @code{O_*} flags defined for @code{openat} are valid.
  197. The @code{openat2} is stricter than @code{openat} in rejecting unknown or
  198. conflicting values. For example, @code{openat2} rejects a mode that
  199. exceeds 07777, whereas @code{openat} silently ignores
  200. excess high-order bits.
  201. @item mode
  202. This field specifies the mode for the new file, similar to @code{mode}
  203. argument of @code{openat}. It should be in the range 0..07777.
  204. @item resolve
  205. This is a bitmask of flags that affect the resolution of file name components.
  206. Unlike @code{O_NOFOLLOW}, it affects all file name components, not just the
  207. last one. The following flags are available.
  208. @table @code
  209. @item RESOLVE_NO_XDEV
  210. Disallow traversal of mount points during path resolution (including all
  211. bind mounts).
  212. @item RESOLVE_NO_MAGICLINKS
  213. Disallow all @strong{magic-link} resolution during path resolution. Magic
  214. links are symbolic link-like objects that are found in @strong{procfs};
  215. for example the @code{/proc/pid/exe}.
  216. @item RESOLVE_NO_SYMLINKS
  217. Disallow resolution of symbolic links during path resolution.
  218. This option implies @code{RESOLVE_NO_MAGICLINKS}.
  219. @item RESOLVE_BENEATH
  220. This rejects absolute pathnames, pathnames containing @file{..}@:
  221. components that would be resolved relative to @var{dfd},
  222. and symbolic links that resolve to pathnames that would be rejected.
  223. The rejection occurs even if @var{dfd} is the root directory.
  224. @item RESOLVE_IN_ROOT
  225. Treat absolute pathnames as being relative to @var{dfd},
  226. treat a @file{..}@: component as being equivalent to @file{.}@:
  227. if it is resolved relative to @var{dfd},
  228. and treat symbolic link contents consistently with this.
  229. @end table
  230. @end table
  231. @end deftp
  232. @deftypefun int openat2 (int @var{dirfd}, const char *@var{pathname}, const struct open_how *@var{how}, size_t @var{size})
  233. @standards{Linux, fcntl.h}
  234. @safety{@mtsafe{}@assafe{}@acsafe{}}
  235. This function is an extension of the @code{openat} and provides a superset of its
  236. functionality. @xref{Descriptor-Relative Access}.
  237. The @var{size} argument must equal @code{sizeof *@var{how}}. For portability
  238. to future API versions that may extend @code{struct open_how}, @code{*@var{how}}
  239. should be fully initialized e.g., by a struct initializer, by @code{memset} to zero,
  240. or by having static storage duration.
  241. On failure, @code{openat2} returns @math{-1} and sets @code{errno}. It can
  242. fail for any of the reasons @code{openat} fails, plus the following reasons:
  243. @table @code
  244. @item E2BIG
  245. @var{size} is too large for any future extension, @code{*@var{how}} contains
  246. non-zero members that are future extensions not supported by this kernel
  247. @item EINVAL
  248. An unknown flag or invalid value was used on @code{*@var{how}}; or
  249. @code{@var{how}->mode} is non-zero, but @code{@var{how}->flags} does not contain
  250. @code{O_CREAT} or @code{O_TMPFILE}, or @var{size} is smaller than the ones supported
  251. by the kernel.
  252. @end table
  253. It can also return all the errors @code{openat} returns.
  254. Similar to @code{openat}, @code{openat2} is a cancellation point.
  255. Unlike other @code{open}-like functions, this function ignores
  256. @code{_FILE_OFFSET_BITS} and always operates in large file mode.
  257. @end deftypefun
  258. @deftypefn {Obsolete function} int creat (const char *@var{filename}, mode_t @var{mode})
  259. @standards{POSIX.1, fcntl.h}
  260. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
  261. This function is obsolete. The call:
  262. @smallexample
  263. creat (@var{filename}, @var{mode})
  264. @end smallexample
  265. @noindent
  266. is equivalent to:
  267. @smallexample
  268. open (@var{filename}, O_WRONLY | O_CREAT | O_TRUNC, @var{mode})
  269. @end smallexample
  270. If on a 32 bit machine the sources are translated with
  271. @code{_FILE_OFFSET_BITS == 64} the function @code{creat} returns a file
  272. descriptor opened in the large file mode which enables the file handling
  273. functions to use files up to @twoexp{63} in size and offset from
  274. @minus{}@twoexp{63} to @twoexp{63}. This happens transparently for the user
  275. since all of the low-level file handling functions are equally replaced.
  276. @end deftypefn
  277. @deftypefn {Obsolete function} int creat64 (const char *@var{filename}, mode_t @var{mode})
  278. @standards{Unix98, fcntl.h}
  279. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
  280. This function is similar to @code{creat}. It returns a file descriptor
  281. which can be used to access the file named by @var{filename}. The only
  282. difference is that on 32 bit systems the file is opened in the
  283. large file mode. I.e., file length and file offsets can exceed 31 bits.
  284. To use this file descriptor one must not use the normal operations but
  285. instead the counterparts named @code{*64}, e.g., @code{read64}.
  286. When the sources are translated with @code{_FILE_OFFSET_BITS == 64} this
  287. function is actually available under the name @code{open}. I.e., the
  288. new, extended API using 64 bit file sizes and offsets transparently
  289. replaces the old API.
  290. @end deftypefn
  291. @deftypefun int close (int @var{filedes})
  292. @standards{POSIX.1, unistd.h}
  293. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
  294. The function @code{close} closes the file descriptor @var{filedes}.
  295. Closing a file has the following consequences:
  296. @itemize @bullet
  297. @item
  298. The file descriptor is deallocated.
  299. @item
  300. Any record locks owned by the process on the file are unlocked.
  301. @item
  302. When all file descriptors associated with a pipe or FIFO have been closed,
  303. any unread data is discarded.
  304. @end itemize
  305. This function is a cancellation point in multi-threaded programs. This
  306. is a problem if the thread allocates some resources (like memory, file
  307. descriptors, semaphores or whatever) at the time @code{close} is
  308. called. If the thread gets canceled these resources stay allocated
  309. until the program ends. To avoid this, calls to @code{close} should be
  310. protected using cancellation handlers.
  311. @c ref pthread_cleanup_push / pthread_cleanup_pop
  312. The normal return value from @code{close} is @math{0}; a value of @math{-1}
  313. is returned in case of failure. The following @code{errno} error
  314. conditions are defined for this function:
  315. @table @code
  316. @item EBADF
  317. The @var{filedes} argument is not a valid file descriptor.
  318. @item EINTR
  319. The @code{close} call was interrupted by a signal.
  320. @xref{Interrupted Primitives}.
  321. Here is an example of how to handle @code{EINTR} properly:
  322. @smallexample
  323. TEMP_FAILURE_RETRY (close (desc));
  324. @end smallexample
  325. @item ENOSPC
  326. @itemx EIO
  327. @itemx EDQUOT
  328. When the file is accessed by NFS, these errors from @code{write} can sometimes
  329. not be detected until @code{close}. @xref{I/O Primitives}, for details
  330. on their meaning.
  331. @end table
  332. Please note that there is @emph{no} separate @code{close64} function.
  333. This is not necessary since this function does not determine nor depend
  334. on the mode of the file. The kernel which performs the @code{close}
  335. operation knows which mode the descriptor is used for and can handle
  336. this situation.
  337. @end deftypefun
  338. To close a stream, call @code{fclose} (@pxref{Closing Streams}) instead
  339. of trying to close its underlying file descriptor with @code{close}.
  340. This flushes any buffered output and updates the stream object to
  341. indicate that it is closed.
  342. @deftypefun int close_range (unsigned int @var{lowfd}, unsigned int @var{maxfd}, int @var{flags})
  343. @standards{Linux, unistd.h}
  344. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
  345. @c This is a syscall for Linux v5.9. There is no fallback emulation for
  346. @c older kernels.
  347. The function @code{close_range} closes the file descriptor from @var{lowfd}
  348. to @var{maxfd} (inclusive). This function is similar to call @code{close} in
  349. specified file descriptor range depending on the @var{flags}.
  350. This is function is only supported on recent Linux versions and @theglibc{}
  351. does not provide any fallback (the application will need to handle possible
  352. @code{ENOSYS}).
  353. The @var{flags} add options on how the files are closes. Linux currently
  354. supports:
  355. @vtable @code
  356. @item CLOSE_RANGE_UNSHARE
  357. Unshare the file descriptor table before closing file descriptors.
  358. @item CLOSE_RANGE_CLOEXEC
  359. Set the @code{FD_CLOEXEC} bit instead of closing the file descriptor.
  360. @end vtable
  361. The normal return value from @code{close_range} is @math{0}; a value
  362. of @math{-1} is returned in case of failure. The following @code{errno} error
  363. conditions are defined for this function:
  364. @table @code
  365. @item EINVAL
  366. The @var{lowfd} value is larger than @var{maxfd} or an unsupported @var{flags}
  367. is used.
  368. @item ENOMEM
  369. Either there is not enough memory for the operation, or the process is
  370. out of address space. It can only happen when @code{CLOSE_RANGE_UNSHARED}
  371. flag is used.
  372. @item EMFILE
  373. The process has too many files open and it can only happens when
  374. @code{CLOSE_RANGE_UNSHARED} flag is used.
  375. The maximum number of file descriptors is controlled by the
  376. @code{RLIMIT_NOFILE} resource limit; @pxref{Limits on Resources}.
  377. @item ENOSYS
  378. The kernel does not implement the required functionality.
  379. @end table
  380. @end deftypefun
  381. @deftypefun void closefrom (int @var{lowfd})
  382. @standards{GNU, unistd.h}
  383. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
  384. The function @code{closefrom} closes all file descriptors greater than or equal
  385. to @var{lowfd}. This function is similar to calling
  386. @code{close} for all open file descriptors not less than @var{lowfd}.
  387. Already closed file descriptors are ignored.
  388. @end deftypefun
  389. @node I/O Primitives
  390. @section Input and Output Primitives
  391. This section describes the functions for performing primitive input and
  392. output operations on file descriptors: @code{read}, @code{write}, and
  393. @code{lseek}. These functions are declared in the header file
  394. @file{unistd.h}.
  395. @pindex unistd.h
  396. @deftp {Data Type} ssize_t
  397. @standards{POSIX.1, unistd.h}
  398. This data type is used to represent the sizes of blocks that can be
  399. read or written in a single operation. It is similar to @code{size_t},
  400. but must be a signed type.
  401. @end deftp
  402. @cindex reading from a file descriptor
  403. @deftypefun ssize_t read (int @var{filedes}, void *@var{buffer}, size_t @var{size})
  404. @standards{POSIX.1, unistd.h}
  405. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  406. The @code{read} function reads up to @var{size} bytes from the file
  407. with descriptor @var{filedes}, storing the results in the @var{buffer}.
  408. (This is not necessarily a character string, and no terminating null
  409. character is added.)
  410. @cindex end-of-file, on a file descriptor
  411. The return value is the number of bytes actually read. This might be
  412. less than @var{size}; for example, if there aren't that many bytes left
  413. in the file or if there aren't that many bytes immediately available.
  414. The exact behavior depends on what kind of file it is. Note that
  415. reading less than @var{size} bytes is not an error.
  416. A value of zero indicates end-of-file (except if the value of the
  417. @var{size} argument is also zero). This is not considered an error.
  418. If you keep calling @code{read} while at end-of-file, it will keep
  419. returning zero and doing nothing else.
  420. If @code{read} returns at least one character, there is no way you can
  421. tell whether end-of-file was reached. But if you did reach the end, the
  422. next read will return zero.
  423. In case of an error, @code{read} returns @math{-1}. The following
  424. @code{errno} error conditions are defined for this function:
  425. @table @code
  426. @item EAGAIN
  427. Normally, when no input is immediately available, @code{read} waits for
  428. some input. But if the @code{O_NONBLOCK} flag is set for the file
  429. (@pxref{File Status Flags}), @code{read} returns immediately without
  430. reading any data, and reports this error.
  431. @strong{Compatibility Note:} Most versions of BSD Unix use a different
  432. error code for this: @code{EWOULDBLOCK}. In @theglibc{},
  433. @code{EWOULDBLOCK} is an alias for @code{EAGAIN}, so it doesn't matter
  434. which name you use.
  435. On some systems, reading a large amount of data from a character special
  436. file can also fail with @code{EAGAIN} if the kernel cannot find enough
  437. physical memory to lock down the user's pages. This is limited to
  438. devices that transfer with direct memory access into the user's memory,
  439. which means it does not include terminals, since they always use
  440. separate buffers inside the kernel. This problem never happens on
  441. @gnuhurdsystems{}.
  442. Any condition that could result in @code{EAGAIN} can instead result in a
  443. successful @code{read} which returns fewer bytes than requested.
  444. Calling @code{read} again immediately would result in @code{EAGAIN}.
  445. @item EBADF
  446. The @var{filedes} argument is not a valid file descriptor,
  447. or is not open for reading.
  448. @item EINTR
  449. @code{read} was interrupted by a signal while it was waiting for input.
  450. @xref{Interrupted Primitives}. A signal will not necessarily cause
  451. @code{read} to return @code{EINTR}; it may instead result in a
  452. successful @code{read} which returns fewer bytes than requested.
  453. @item EIO
  454. For many devices, and for disk files, this error code indicates
  455. a hardware error.
  456. @code{EIO} also occurs when a background process tries to read from the
  457. controlling terminal, and the normal action of stopping the process by
  458. sending it a @code{SIGTTIN} signal isn't working. This might happen if
  459. the signal is being blocked or ignored, or because the process group is
  460. orphaned. @xref{Job Control}, for more information about job control,
  461. and @ref{Signal Handling}, for information about signals.
  462. @item EINVAL
  463. In some systems, when reading from a character or block device, position
  464. and size offsets must be aligned to a particular block size. This error
  465. indicates that the offsets were not properly aligned.
  466. @end table
  467. Please note that there is no function named @code{read64}. This is not
  468. necessary since this function does not directly modify or handle the
  469. possibly wide file offset. Since the kernel handles this state
  470. internally, the @code{read} function can be used for all cases.
  471. This function is a cancellation point in multi-threaded programs. This
  472. is a problem if the thread allocates some resources (like memory, file
  473. descriptors, semaphores or whatever) at the time @code{read} is
  474. called. If the thread gets canceled these resources stay allocated
  475. until the program ends. To avoid this, calls to @code{read} should be
  476. protected using cancellation handlers.
  477. @c ref pthread_cleanup_push / pthread_cleanup_pop
  478. The @code{read} function is the underlying primitive for all of the
  479. functions that read from streams, such as @code{fgetc}.
  480. @end deftypefun
  481. @deftypefun ssize_t pread (int @var{filedes}, void *@var{buffer}, size_t @var{size}, off_t @var{offset})
  482. @standards{Unix98, unistd.h}
  483. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  484. @c This is usually a safe syscall. The sysdeps/posix fallback emulation
  485. @c is not MT-Safe because it uses lseek, read and lseek back, but is it
  486. @c used anywhere?
  487. The @code{pread} function is similar to the @code{read} function. The
  488. first three arguments are identical, and the return values and error
  489. codes also correspond.
  490. The difference is the fourth argument and its handling. The data block
  491. is not read from the current position of the file descriptor
  492. @code{filedes}. Instead the data is read from the file starting at
  493. position @var{offset}. The position of the file descriptor itself is
  494. not affected by the operation. The value is the same as before the call.
  495. When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the
  496. @code{pread} function is in fact @code{pread64} and the type
  497. @code{off_t} has 64 bits, which makes it possible to handle files up to
  498. @twoexp{63} bytes in length.
  499. The return value of @code{pread} describes the number of bytes read.
  500. In the error case it returns @math{-1} like @code{read} does and the
  501. error codes are also the same, with these additions:
  502. @table @code
  503. @item EINVAL
  504. The value given for @var{offset} is negative and therefore illegal.
  505. @item ESPIPE
  506. The file descriptor @var{filedes} is associated with a pipe or a FIFO and
  507. this device does not allow positioning of the file pointer.
  508. @end table
  509. The function is an extension defined in the Unix Single Specification
  510. version 2.
  511. @end deftypefun
  512. @deftypefun ssize_t pread64 (int @var{filedes}, void *@var{buffer}, size_t @var{size}, off64_t @var{offset})
  513. @standards{Unix98, unistd.h}
  514. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  515. @c This is usually a safe syscall. The sysdeps/posix fallback emulation
  516. @c is not MT-Safe because it uses lseek64, read and lseek64 back, but is
  517. @c it used anywhere?
  518. This function is similar to the @code{pread} function. The difference
  519. is that the @var{offset} parameter is of type @code{off64_t} instead of
  520. @code{off_t} which makes it possible on 32 bit machines to address
  521. files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The
  522. file descriptor @code{filedes} must be opened using @code{open64} since
  523. otherwise the large offsets possible with @code{off64_t} will lead to
  524. errors with a descriptor in small file mode.
  525. When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} on a
  526. 32 bit machine this function is actually available under the name
  527. @code{pread} and so transparently replaces the 32 bit interface.
  528. @end deftypefun
  529. @cindex writing to a file descriptor
  530. @deftypefun ssize_t write (int @var{filedes}, const void *@var{buffer}, size_t @var{size})
  531. @standards{POSIX.1, unistd.h}
  532. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  533. @c Some say write is thread-unsafe on Linux without O_APPEND. In the VFS layer
  534. @c the vfs_write() does no locking around the acquisition of a file offset and
  535. @c therefore multiple threads / kernel tasks may race and get the same offset
  536. @c resulting in data loss.
  537. @c
  538. @c See:
  539. @c http://thread.gmane.org/gmane.linux.kernel/397980
  540. @c http://lwn.net/Articles/180387/
  541. @c
  542. @c The counter argument is that POSIX only says that the write starts at the
  543. @c file position and that the file position is updated *before* the function
  544. @c returns. What that really means is that any expectation of atomic writes is
  545. @c strictly an invention of the interpretation of the reader. Data loss could
  546. @c happen if two threads start the write at the same time. Only writes that
  547. @c come after the return of another write are guaranteed to follow the other
  548. @c write.
  549. @c
  550. @c The other side of the coin is that POSIX goes on further to say in
  551. @c "2.9.7 Thread Interactions with Regular File Operations" that threads
  552. @c should never see interleaving sets of file operations, but it is insane
  553. @c to do anything like that because it kills performance, so you don't get
  554. @c those guarantees in Linux.
  555. @c
  556. @c So we mark it thread safe, it doesn't blow up, but you might loose
  557. @c data, and we don't strictly meet the POSIX requirements.
  558. @c
  559. @c The fix for file offsets racing was merged in 3.14, the commits were:
  560. @c 9c225f2655e36a470c4f58dbbc99244c5fc7f2d4, and
  561. @c d7a15f8d0777955986a2ab00ab181795cab14b01. Therefore after Linux 3.14 you
  562. @c should get mostly MT-safe writes.
  563. The @code{write} function writes up to @var{size} bytes from
  564. @var{buffer} to the file with descriptor @var{filedes}. The data in
  565. @var{buffer} is not necessarily a character string and a null character is
  566. output like any other character.
  567. The return value is the number of bytes actually written. This may be
  568. @var{size}, but can always be smaller. Your program should always call
  569. @code{write} in a loop, iterating until all the data is written.
  570. Once @code{write} returns, the data is enqueued to be written and can be
  571. read back right away, but it is not necessarily written out to permanent
  572. storage immediately. You can use @code{fsync} when you need to be sure
  573. your data has been permanently stored before continuing. (It is more
  574. efficient for the system to batch up consecutive writes and do them all
  575. at once when convenient. Normally they will always be written to disk
  576. within a minute or less.) Modern systems provide another function
  577. @code{fdatasync} which guarantees integrity only for the file data and
  578. is therefore faster.
  579. @c !!! xref fsync, fdatasync
  580. You can use the @code{O_FSYNC} open mode to make @code{write} always
  581. store the data to disk before returning; @pxref{Operating Modes}.
  582. In the case of an error, @code{write} returns @math{-1}. The following
  583. @code{errno} error conditions are defined for this function:
  584. @table @code
  585. @item EAGAIN
  586. Normally, @code{write} blocks until the write operation is complete.
  587. But if the @code{O_NONBLOCK} flag is set for the file (@pxref{Control
  588. Operations}), it returns immediately without writing any data and
  589. reports this error. An example of a situation that might cause the
  590. process to block on output is writing to a terminal device that supports
  591. flow control, where output has been suspended by receipt of a STOP
  592. character.
  593. @strong{Compatibility Note:} Most versions of BSD Unix use a different
  594. error code for this: @code{EWOULDBLOCK}. In @theglibc{},
  595. @code{EWOULDBLOCK} is an alias for @code{EAGAIN}, so it doesn't matter
  596. which name you use.
  597. On some systems, writing a large amount of data from a character special
  598. file can also fail with @code{EAGAIN} if the kernel cannot find enough
  599. physical memory to lock down the user's pages. This is limited to
  600. devices that transfer with direct memory access into the user's memory,
  601. which means it does not include terminals, since they always use
  602. separate buffers inside the kernel. This problem does not arise on
  603. @gnuhurdsystems{}.
  604. @item EBADF
  605. The @var{filedes} argument is not a valid file descriptor,
  606. or is not open for writing.
  607. @item EFBIG
  608. The size of the file would become larger than the implementation can support.
  609. @item EINTR
  610. The @code{write} operation was interrupted by a signal while it was
  611. blocked waiting for completion. A signal will not necessarily cause
  612. @code{write} to return @code{EINTR}; it may instead result in a
  613. successful @code{write} which writes fewer bytes than requested.
  614. @xref{Interrupted Primitives}.
  615. @item EIO
  616. For many devices, and for disk files, this error code indicates
  617. a hardware error.
  618. @item ENOSPC
  619. The device containing the file is full.
  620. @item EPIPE
  621. This error is returned when you try to write to a pipe or FIFO that
  622. isn't open for reading by any process. When this happens, a @code{SIGPIPE}
  623. signal is also sent to the process; see @ref{Signal Handling}.
  624. @item EINVAL
  625. In some systems, when writing to a character or block device, position
  626. and size offsets must be aligned to a particular block size. This error
  627. indicates that the offsets were not properly aligned.
  628. @end table
  629. Unless you have arranged to prevent @code{EINTR} failures, you should
  630. check @code{errno} after each failing call to @code{write}, and if the
  631. error was @code{EINTR}, you should simply repeat the call.
  632. @xref{Interrupted Primitives}. The easy way to do this is with the
  633. macro @code{TEMP_FAILURE_RETRY}, as follows:
  634. @smallexample
  635. nbytes = TEMP_FAILURE_RETRY (write (desc, buffer, count));
  636. @end smallexample
  637. Please note that there is no function named @code{write64}. This is not
  638. necessary since this function does not directly modify or handle the
  639. possibly wide file offset. Since the kernel handles this state
  640. internally the @code{write} function can be used for all cases.
  641. This function is a cancellation point in multi-threaded programs. This
  642. is a problem if the thread allocates some resources (like memory, file
  643. descriptors, semaphores or whatever) at the time @code{write} is
  644. called. If the thread gets canceled these resources stay allocated
  645. until the program ends. To avoid this, calls to @code{write} should be
  646. protected using cancellation handlers.
  647. @c ref pthread_cleanup_push / pthread_cleanup_pop
  648. The @code{write} function is the underlying primitive for all of the
  649. functions that write to streams, such as @code{fputc}.
  650. @end deftypefun
  651. @deftypefun ssize_t pwrite (int @var{filedes}, const void *@var{buffer}, size_t @var{size}, off_t @var{offset})
  652. @standards{Unix98, unistd.h}
  653. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  654. @c This is usually a safe syscall. The sysdeps/posix fallback emulation
  655. @c is not MT-Safe because it uses lseek, write and lseek back, but is it
  656. @c used anywhere?
  657. The @code{pwrite} function is similar to the @code{write} function. The
  658. first three arguments are identical, and the return values and error codes
  659. also correspond.
  660. The difference is the fourth argument and its handling. The data block
  661. is not written to the current position of the file descriptor
  662. @code{filedes}. Instead the data is written to the file starting at
  663. position @var{offset}. The position of the file descriptor itself is
  664. not affected by the operation. The value is the same as before the call.
  665. However, on Linux, if a file is opened with @code{O_APPEND}, @code{pwrite}
  666. appends data to the end of the file, regardless of the value of
  667. @code{offset}.
  668. When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the
  669. @code{pwrite} function is in fact @code{pwrite64} and the type
  670. @code{off_t} has 64 bits, which makes it possible to handle files up to
  671. @twoexp{63} bytes in length.
  672. The return value of @code{pwrite} describes the number of written bytes.
  673. In the error case it returns @math{-1} like @code{write} does and the
  674. error codes are also the same, with these additions:
  675. @table @code
  676. @item EINVAL
  677. The value given for @var{offset} is negative and therefore illegal.
  678. @item ESPIPE
  679. The file descriptor @var{filedes} is associated with a pipe or a FIFO and
  680. this device does not allow positioning of the file pointer.
  681. @end table
  682. The function is an extension defined in the Unix Single Specification
  683. version 2.
  684. @end deftypefun
  685. @deftypefun ssize_t pwrite64 (int @var{filedes}, const void *@var{buffer}, size_t @var{size}, off64_t @var{offset})
  686. @standards{Unix98, unistd.h}
  687. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  688. @c This is usually a safe syscall. The sysdeps/posix fallback emulation
  689. @c is not MT-Safe because it uses lseek64, write and lseek64 back, but
  690. @c is it used anywhere?
  691. This function is similar to the @code{pwrite} function. The difference
  692. is that the @var{offset} parameter is of type @code{off64_t} instead of
  693. @code{off_t} which makes it possible on 32 bit machines to address
  694. files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The
  695. file descriptor @code{filedes} must be opened using @code{open64} since
  696. otherwise the large offsets possible with @code{off64_t} will lead to
  697. errors with a descriptor in small file mode.
  698. When the source file is compiled using @code{_FILE_OFFSET_BITS == 64} on a
  699. 32 bit machine this function is actually available under the name
  700. @code{pwrite} and so transparently replaces the 32 bit interface.
  701. @end deftypefun
  702. @node File Position Primitive
  703. @section Setting the File Position of a Descriptor
  704. Just as you can set the file position of a stream with @code{fseek}, you
  705. can set the file position of a descriptor with @code{lseek}. This
  706. specifies the position in the file for the next @code{read} or
  707. @code{write} operation. @xref{File Positioning}, for more information
  708. on the file position and what it means.
  709. To read the current file position value from a descriptor, use
  710. @code{lseek (@var{desc}, 0, SEEK_CUR)}.
  711. @cindex file positioning on a file descriptor
  712. @cindex positioning a file descriptor
  713. @cindex seeking on a file descriptor
  714. @deftypefun off_t lseek (int @var{filedes}, off_t @var{offset}, int @var{whence})
  715. @standards{POSIX.1, unistd.h}
  716. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  717. The @code{lseek} function is used to change the file position of the
  718. file with descriptor @var{filedes}.
  719. The @var{whence} argument specifies how the @var{offset} should be
  720. interpreted, in the same way as for the @code{fseek} function, and it must
  721. be one of the symbolic constants @code{SEEK_SET}, @code{SEEK_CUR}, or
  722. @code{SEEK_END}.
  723. @vtable @code
  724. @item SEEK_SET
  725. Specifies that @var{offset} is a count of characters from the beginning
  726. of the file.
  727. @item SEEK_CUR
  728. Specifies that @var{offset} is a count of characters from the current
  729. file position. This count may be positive or negative.
  730. @item SEEK_END
  731. Specifies that @var{offset} is a count of characters from the end of
  732. the file. A negative count specifies a position within the current
  733. extent of the file; a positive count specifies a position past the
  734. current end. If you set the position past the current end, and
  735. actually write data, you will extend the file with zeros up to that
  736. position.
  737. @end vtable
  738. The return value from @code{lseek} is normally the resulting file
  739. position, measured in bytes from the beginning of the file.
  740. You can use this feature together with @code{SEEK_CUR} to read the
  741. current file position.
  742. If you want to append to the file, setting the file position to the
  743. current end of file with @code{SEEK_END} is not sufficient. Another
  744. process may write more data after you seek but before you write,
  745. extending the file so the position you write onto clobbers their data.
  746. Instead, use the @code{O_APPEND} operating mode; @pxref{Operating Modes}.
  747. You can set the file position past the current end of the file. This
  748. does not by itself make the file longer; @code{lseek} never changes the
  749. file. But subsequent output at that position will extend the file.
  750. Characters between the previous end of file and the new position are
  751. filled with zeros. Extending the file in this way can create a
  752. ``hole'': the blocks of zeros are not actually allocated on disk, so the
  753. file takes up less space than it appears to; it is then called a
  754. ``sparse file''.
  755. @cindex sparse files
  756. @cindex holes in files
  757. If the file position cannot be changed, or the operation is in some way
  758. invalid, @code{lseek} returns a value of @math{-1}. The following
  759. @code{errno} error conditions are defined for this function:
  760. @table @code
  761. @item EBADF
  762. The @var{filedes} is not a valid file descriptor.
  763. @item EINVAL
  764. The @var{whence} argument value is not valid, or the resulting
  765. file offset is not valid. A file offset is invalid.
  766. @item ESPIPE
  767. The @var{filedes} corresponds to an object that cannot be positioned,
  768. such as a pipe, FIFO or terminal device. (POSIX.1 specifies this error
  769. only for pipes and FIFOs, but on @gnusystems{}, you always get
  770. @code{ESPIPE} if the object is not seekable.)
  771. @end table
  772. When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the
  773. @code{lseek} function is in fact @code{lseek64} and the type
  774. @code{off_t} has 64 bits which makes it possible to handle files up to
  775. @twoexp{63} bytes in length.
  776. This function is a cancellation point in multi-threaded programs. This
  777. is a problem if the thread allocates some resources (like memory, file
  778. descriptors, semaphores or whatever) at the time @code{lseek} is
  779. called. If the thread gets canceled these resources stay allocated
  780. until the program ends. To avoid this calls to @code{lseek} should be
  781. protected using cancellation handlers.
  782. @c ref pthread_cleanup_push / pthread_cleanup_pop
  783. The @code{lseek} function is the underlying primitive for the
  784. @code{fseek}, @code{fseeko}, @code{ftell}, @code{ftello} and
  785. @code{rewind} functions, which operate on streams instead of file
  786. descriptors.
  787. @end deftypefun
  788. @deftypefun off64_t lseek64 (int @var{filedes}, off64_t @var{offset}, int @var{whence})
  789. @standards{Unix98, unistd.h}
  790. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  791. This function is similar to the @code{lseek} function. The difference
  792. is that the @var{offset} parameter is of type @code{off64_t} instead of
  793. @code{off_t} which makes it possible on 32 bit machines to address
  794. files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The
  795. file descriptor @code{filedes} must be opened using @code{open64} since
  796. otherwise the large offsets possible with @code{off64_t} will lead to
  797. errors with a descriptor in small file mode.
  798. When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} on a
  799. 32 bits machine this function is actually available under the name
  800. @code{lseek} and so transparently replaces the 32 bit interface.
  801. @end deftypefun
  802. You can have multiple descriptors for the same file if you open the file
  803. more than once, or if you duplicate a descriptor with @code{dup}.
  804. Descriptors that come from separate calls to @code{open} have independent
  805. file positions; using @code{lseek} on one descriptor has no effect on the
  806. other. For example,
  807. @smallexample
  808. @group
  809. @{
  810. int d1, d2;
  811. char buf[4];
  812. d1 = open ("foo", O_RDONLY);
  813. d2 = open ("foo", O_RDONLY);
  814. lseek (d1, 1024, SEEK_SET);
  815. read (d2, buf, 4);
  816. @}
  817. @end group
  818. @end smallexample
  819. @noindent
  820. will read the first four characters of the file @file{foo}. (The
  821. error-checking code necessary for a real program has been omitted here
  822. for brevity.)
  823. By contrast, descriptors made by duplication share a common file
  824. position with the original descriptor that was duplicated. Anything
  825. which alters the file position of one of the duplicates, including
  826. reading or writing data, affects all of them alike. Thus, for example,
  827. @smallexample
  828. @{
  829. int d1, d2, d3;
  830. char buf1[4], buf2[4];
  831. d1 = open ("foo", O_RDONLY);
  832. d2 = dup (d1);
  833. d3 = dup (d2);
  834. lseek (d3, 1024, SEEK_SET);
  835. read (d1, buf1, 4);
  836. read (d2, buf2, 4);
  837. @}
  838. @end smallexample
  839. @noindent
  840. will read four characters starting with the 1024'th character of
  841. @file{foo}, and then four more characters starting with the 1028'th
  842. character.
  843. @deftp {Data Type} off_t
  844. @standards{POSIX.1, sys/types.h}
  845. This is a signed integer type used to represent file sizes. In
  846. @theglibc{}, this type is no narrower than @code{int}.
  847. If the source is compiled with @code{_FILE_OFFSET_BITS == 64} this type
  848. is transparently replaced by @code{off64_t}.
  849. @end deftp
  850. @deftp {Data Type} off64_t
  851. @standards{Unix98, sys/types.h}
  852. This type is used similar to @code{off_t}. The difference is that even
  853. on 32 bit machines, where the @code{off_t} type would have 32 bits,
  854. @code{off64_t} has 64 bits and so is able to address files up to
  855. @twoexp{63} bytes in length.
  856. When compiling with @code{_FILE_OFFSET_BITS == 64} this type is
  857. available under the name @code{off_t}.
  858. @end deftp
  859. These aliases for the @samp{SEEK_@dots{}} constants exist for the sake
  860. of compatibility with older BSD systems. They are defined in two
  861. different header files: @file{fcntl.h} and @file{sys/file.h}.
  862. @vtable @code
  863. @item L_SET
  864. An alias for @code{SEEK_SET}.
  865. @item L_INCR
  866. An alias for @code{SEEK_CUR}.
  867. @item L_XTND
  868. An alias for @code{SEEK_END}.
  869. @end vtable
  870. @node Descriptors and Streams
  871. @section Descriptors and Streams
  872. @cindex streams, and file descriptors
  873. @cindex converting file descriptor to stream
  874. @cindex extracting file descriptor from stream
  875. Given an open file descriptor, you can create a stream for it with the
  876. @code{fdopen} function. You can get the underlying file descriptor for
  877. an existing stream with the @code{fileno} function. These functions are
  878. declared in the header file @file{stdio.h}.
  879. @pindex stdio.h
  880. @deftypefun {FILE *} fdopen (int @var{filedes}, const char *@var{opentype})
  881. @standards{POSIX.1, stdio.h}
  882. @safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acsmem{} @aculock{}}}
  883. The @code{fdopen} function returns a new stream for the file descriptor
  884. @var{filedes}.
  885. The @var{opentype} argument is interpreted in the same way as for the
  886. @code{fopen} function (@pxref{Opening Streams}), except that
  887. the @samp{b} option is not permitted; this is because @gnusystems{} make no
  888. distinction between text and binary files. Also, @code{"w"} and
  889. @code{"w+"} do not cause truncation of the file; these have an effect only
  890. when opening a file, and in this case the file has already been opened.
  891. You must make sure that the @var{opentype} argument matches the actual
  892. mode of the open file descriptor.
  893. The return value is the new stream. If the stream cannot be created
  894. (for example, if the modes for the file indicated by the file descriptor
  895. do not permit the access specified by the @var{opentype} argument), a
  896. null pointer is returned instead.
  897. In some other systems, @code{fdopen} may fail to detect that the modes
  898. for file descriptors do not permit the access specified by
  899. @code{opentype}. @Theglibc{} always checks for this.
  900. @end deftypefun
  901. For an example showing the use of the @code{fdopen} function,
  902. see @ref{Creating a Pipe}.
  903. @deftypefun int fileno (FILE *@var{stream})
  904. @standards{POSIX.1, stdio.h}
  905. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  906. This function returns the file descriptor associated with the stream
  907. @var{stream}. If an error is detected (for example, if the @var{stream}
  908. is not valid) or if @var{stream} does not do I/O to a file,
  909. @code{fileno} returns @math{-1}.
  910. @end deftypefun
  911. @deftypefun int fileno_unlocked (FILE *@var{stream})
  912. @standards{GNU, stdio.h}
  913. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  914. The @code{fileno_unlocked} function is equivalent to the @code{fileno}
  915. function except that it does not implicitly lock the stream if the state
  916. is @code{FSETLOCKING_INTERNAL}.
  917. This function is a GNU extension.
  918. @end deftypefun
  919. @cindex standard file descriptors
  920. @cindex file descriptors, standard
  921. There are also symbolic constants defined in @file{unistd.h} for the
  922. file descriptors belonging to the standard streams @code{stdin},
  923. @code{stdout}, and @code{stderr}; see @ref{Standard Streams}.
  924. @pindex unistd.h
  925. @vtable @code
  926. @item STDIN_FILENO
  927. @standards{POSIX.1, unistd.h}
  928. This macro has value @code{0}, which is the file descriptor for
  929. standard input.
  930. @cindex standard input file descriptor
  931. @item STDOUT_FILENO
  932. @standards{POSIX.1, unistd.h}
  933. This macro has value @code{1}, which is the file descriptor for
  934. standard output.
  935. @cindex standard output file descriptor
  936. @item STDERR_FILENO
  937. @standards{POSIX.1, unistd.h}
  938. This macro has value @code{2}, which is the file descriptor for
  939. standard error output.
  940. @end vtable
  941. @cindex standard error file descriptor
  942. @node Stream/Descriptor Precautions
  943. @section Dangers of Mixing Streams and Descriptors
  944. @cindex channels
  945. @cindex streams and descriptors
  946. @cindex descriptors and streams
  947. @cindex mixing descriptors and streams
  948. You can have multiple file descriptors and streams (let's call both
  949. streams and descriptors ``channels'' for short) connected to the same
  950. file, but you must take care to avoid confusion between channels. There
  951. are two cases to consider: @dfn{linked} channels that share a single
  952. file position value, and @dfn{independent} channels that have their own
  953. file positions.
  954. It's best to use just one channel in your program for actual data
  955. transfer to any given file, except when all the access is for input.
  956. For example, if you open a pipe (something you can only do at the file
  957. descriptor level), either do all I/O with the descriptor, or construct a
  958. stream from the descriptor with @code{fdopen} and then do all I/O with
  959. the stream.
  960. @menu
  961. * Linked Channels:: Dealing with channels sharing a file position.
  962. * Independent Channels:: Dealing with separately opened, unlinked channels.
  963. * Cleaning Streams:: Cleaning a stream makes it safe to use
  964. another channel.
  965. @end menu
  966. @node Linked Channels
  967. @subsection Linked Channels
  968. @cindex linked channels
  969. Channels that come from a single opening share the same file position;
  970. we call them @dfn{linked} channels. Linked channels result when you
  971. make a stream from a descriptor using @code{fdopen}, when you get a
  972. descriptor from a stream with @code{fileno}, when you copy a descriptor
  973. with @code{dup} or @code{dup2}, and when descriptors are inherited
  974. during @code{fork}. For files that don't support random access, such as
  975. terminals and pipes, @emph{all} channels are effectively linked. On
  976. random-access files, all append-type output streams are effectively
  977. linked to each other.
  978. @cindex cleaning up a stream
  979. If you have been using a stream for I/O (or have just opened the stream),
  980. and you want to do I/O using
  981. another channel (either a stream or a descriptor) that is linked to it,
  982. you must first @dfn{clean up} the stream that you have been using.
  983. @xref{Cleaning Streams}.
  984. Terminating a process, or executing a new program in the process,
  985. destroys all the streams in the process. If descriptors linked to these
  986. streams persist in other processes, their file positions become
  987. undefined as a result. To prevent this, you must clean up the streams
  988. before destroying them.
  989. In addition to cleaning up a stream before doing I/O using another
  990. linked channel, additional precautions are needed to ensure a
  991. well-defined file position indicator in some cases. If both the
  992. following conditions hold, you must set the file position indicator on
  993. the new channel (a stream) using a function such as @code{fseek}.
  994. @itemize @bullet
  995. @item
  996. The new linked channel is a stream that was previously active.
  997. @item
  998. The file position indicator was previously set on that channel (while
  999. it was previously active) with a function such as @code{fseek}.
  1000. @end itemize
  1001. POSIX requires such precautions in more cases: if either the old or
  1002. the new linked channel is a stream (whether or not previously active)
  1003. and the file position indicator was previously set on any channel
  1004. linked to those channels with a function such as @code{fseek} or
  1005. @code{lseek}.
  1006. @node Independent Channels
  1007. @subsection Independent Channels
  1008. @cindex independent channels
  1009. When you open channels (streams or descriptors) separately on a seekable
  1010. file, each channel has its own file position. These are called
  1011. @dfn{independent channels}.
  1012. The system handles each channel independently. Most of the time, this
  1013. is quite predictable and natural (especially for input): each channel
  1014. can read or write sequentially at its own place in the file. However,
  1015. if some of the channels are streams, you must take these precautions:
  1016. @itemize @bullet
  1017. @item
  1018. You should clean an output stream after use, before doing anything else
  1019. that might read or write from the same part of the file.
  1020. @item
  1021. You should clean an input stream before reading data that may have been
  1022. modified using an independent channel. Otherwise, you might read
  1023. obsolete data that had been in the stream's buffer.
  1024. @end itemize
  1025. If you do output to one channel at the end of the file, this will
  1026. certainly leave the other independent channels positioned somewhere
  1027. before the new end. You cannot reliably set their file positions to the
  1028. new end of file before writing, because the file can always be extended
  1029. by another process between when you set the file position and when you
  1030. write the data. Instead, use an append-type descriptor or stream; they
  1031. always output at the current end of the file. In order to make the
  1032. end-of-file position accurate, you must clean the output channel you
  1033. were using, if it is a stream.
  1034. It's impossible for two channels to have separate file pointers for a
  1035. file that doesn't support random access. Thus, channels for reading or
  1036. writing such files are always linked, never independent. Append-type
  1037. channels are also always linked. For these channels, follow the rules
  1038. for linked channels; see @ref{Linked Channels}.
  1039. @node Cleaning Streams
  1040. @subsection Cleaning Streams
  1041. You can use @code{fflush} to clean a stream in most
  1042. cases.
  1043. You can skip the @code{fflush} if you know the stream
  1044. is already clean. A stream is clean whenever its buffer is empty. For
  1045. example, an unbuffered stream is always clean. An input stream that is
  1046. at end-of-file is clean. A line-buffered stream is clean when the last
  1047. character output was a newline. However, a just-opened input stream
  1048. might not be clean, as its input buffer might not be empty.
  1049. There is one case in which cleaning a stream is impossible on most
  1050. systems. This is when the stream is doing input from a file that is not
  1051. random-access. Such streams typically read ahead, and when the file is
  1052. not random access, there is no way to give back the excess data already
  1053. read. When an input stream reads from a random-access file,
  1054. @code{fflush} does clean the stream, but leaves the file pointer at an
  1055. unpredictable place; you must set the file pointer before doing any
  1056. further I/O.
  1057. Closing an output-only stream also does @code{fflush}, so this is a
  1058. valid way of cleaning an output stream.
  1059. You need not clean a stream before using its descriptor for control
  1060. operations such as setting terminal modes; these operations don't affect
  1061. the file position and are not affected by it. You can use any
  1062. descriptor for these operations, and all channels are affected
  1063. simultaneously. However, text already ``output'' to a stream but still
  1064. buffered by the stream will be subject to the new terminal modes when
  1065. subsequently flushed. To make sure ``past'' output is covered by the
  1066. terminal settings that were in effect at the time, flush the output
  1067. streams for that terminal before setting the modes. @xref{Terminal
  1068. Modes}.
  1069. @node Scatter-Gather
  1070. @section Fast Scatter-Gather I/O
  1071. @cindex scatter-gather
  1072. Some applications may need to read or write data to multiple buffers,
  1073. which are separated in memory. Although this can be done easily enough
  1074. with multiple calls to @code{read} and @code{write}, it is inefficient
  1075. because there is overhead associated with each kernel call.
  1076. Instead, many platforms provide special high-speed primitives to perform
  1077. these @dfn{scatter-gather} operations in a single kernel call. @Theglibc{}
  1078. will provide an emulation on any system that lacks these
  1079. primitives, so they are not a portability threat. They are defined in
  1080. @code{sys/uio.h}.
  1081. These functions are controlled with arrays of @code{iovec} structures,
  1082. which describe the location and size of each buffer.
  1083. @deftp {Data Type} {struct iovec}
  1084. @standards{BSD, sys/uio.h}
  1085. The @code{iovec} structure describes a buffer. It contains two fields:
  1086. @table @code
  1087. @item void *iov_base
  1088. Contains the address of a buffer.
  1089. @item size_t iov_len
  1090. Contains the length of the buffer.
  1091. @end table
  1092. @end deftp
  1093. @deftypefun ssize_t readv (int @var{filedes}, const struct iovec *@var{vector}, int @var{count})
  1094. @standards{BSD, sys/uio.h}
  1095. @safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}}
  1096. @c The fallback sysdeps/posix implementation, used even on GNU/Linux
  1097. @c with old kernels that lack a full readv/writev implementation, may
  1098. @c malloc the buffer into which data is read, if the total read size is
  1099. @c too large for alloca.
  1100. The @code{readv} function reads data from @var{filedes} and scatters it
  1101. into the buffers described in @var{vector}, which is taken to be
  1102. @var{count} structures long. As each buffer is filled, data is sent to the
  1103. next.
  1104. Note that @code{readv} is not guaranteed to fill all the buffers.
  1105. It may stop at any point, for the same reasons @code{read} would.
  1106. The return value is a count of bytes (@emph{not} buffers) read, @math{0}
  1107. indicating end-of-file, or @math{-1} indicating an error. The possible
  1108. errors are the same as in @code{read}.
  1109. @end deftypefun
  1110. @deftypefun ssize_t writev (int @var{filedes}, const struct iovec *@var{vector}, int @var{count})
  1111. @standards{BSD, sys/uio.h}
  1112. @safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}}
  1113. @c The fallback sysdeps/posix implementation, used even on GNU/Linux
  1114. @c with old kernels that lack a full readv/writev implementation, may
  1115. @c malloc the buffer from which data is written, if the total write size
  1116. @c is too large for alloca.
  1117. The @code{writev} function gathers data from the buffers described in
  1118. @var{vector}, which is taken to be @var{count} structures long, and writes
  1119. them to @code{filedes}. As each buffer is written, it moves on to the
  1120. next.
  1121. Like @code{readv}, @code{writev} may stop midstream under the same
  1122. conditions @code{write} would.
  1123. The return value is a count of bytes written, or @math{-1} indicating an
  1124. error. The possible errors are the same as in @code{write}.
  1125. @end deftypefun
  1126. @deftypefun ssize_t preadv (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off_t @var{offset})
  1127. @standards{BSD, sys/uio.h}
  1128. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1129. @c This is a syscall for Linux 3.2 for all architectures but microblaze
  1130. @c (which was added on 3.15). The sysdeps/posix fallback emulation
  1131. @c is also MT-Safe since it calls pread, and it is now a syscall on all
  1132. @c targets.
  1133. This function is similar to the @code{readv} function, with the difference
  1134. it adds an extra @var{offset} parameter of type @code{off_t} similar to
  1135. @code{pread}. The data is read from the file starting at position
  1136. @var{offset}. The position of the file descriptor itself is not affected
  1137. by the operation. The value is the same as before the call.
  1138. When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the
  1139. @code{preadv} function is in fact @code{preadv64} and the type
  1140. @code{off_t} has 64 bits, which makes it possible to handle files up to
  1141. @twoexp{63} bytes in length.
  1142. The return value is a count of bytes (@emph{not} buffers) read, @math{0}
  1143. indicating end-of-file, or @math{-1} indicating an error. The possible
  1144. errors are the same as in @code{readv} and @code{pread}.
  1145. @end deftypefun
  1146. @deftypefun ssize_t preadv64 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off64_t @var{offset})
  1147. @standards{BSD, unistd.h}
  1148. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1149. @c This is a syscall for Linux 3.2 for all architectures but microblaze
  1150. @c (which was added on 3.15). The sysdeps/posix fallback emulation
  1151. @c is also MT-Safe since it calls pread64, and it is now a syscall on all
  1152. @c targets.
  1153. This function is similar to the @code{preadv} function with the difference
  1154. is that the @var{offset} parameter is of type @code{off64_t} instead of
  1155. @code{off_t}. It makes it possible on 32 bit machines to address
  1156. files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The
  1157. file descriptor @code{filedes} must be opened using @code{open64} since
  1158. otherwise the large offsets possible with @code{off64_t} will lead to
  1159. errors with a descriptor in small file mode.
  1160. When the source file is compiled using @code{_FILE_OFFSET_BITS == 64} on a
  1161. 32 bit machine this function is actually available under the name
  1162. @code{preadv} and so transparently replaces the 32 bit interface.
  1163. @end deftypefun
  1164. @deftypefun ssize_t pwritev (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off_t @var{offset})
  1165. @standards{BSD, sys/uio.h}
  1166. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1167. @c This is a syscall for Linux 3.2 for all architectures but microblaze
  1168. @c (which was added on 3.15). The sysdeps/posix fallback emulation
  1169. @c is also MT-Safe since it calls pwrite, and it is now a syscall on all
  1170. @c targets.
  1171. This function is similar to the @code{writev} function, with the difference
  1172. it adds an extra @var{offset} parameter of type @code{off_t} similar to
  1173. @code{pwrite}. The data is written to the file starting at position
  1174. @var{offset}. The position of the file descriptor itself is not affected
  1175. by the operation. The value is the same as before the call.
  1176. However, on Linux, if a file is opened with @code{O_APPEND}, @code{pwrite}
  1177. appends data to the end of the file, regardless of the value of
  1178. @code{offset}.
  1179. When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the
  1180. @code{pwritev} function is in fact @code{pwritev64} and the type
  1181. @code{off_t} has 64 bits, which makes it possible to handle files up to
  1182. @twoexp{63} bytes in length.
  1183. The return value is a count of bytes (@emph{not} buffers) written, @math{0}
  1184. indicating end-of-file, or @math{-1} indicating an error. The possible
  1185. errors are the same as in @code{writev} and @code{pwrite}.
  1186. @end deftypefun
  1187. @deftypefun ssize_t pwritev64 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off64_t @var{offset})
  1188. @standards{BSD, unistd.h}
  1189. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1190. @c This is a syscall for Linux 3.2 for all architectures but microblaze
  1191. @c (which was added on 3.15). The sysdeps/posix fallback emulation
  1192. @c is also MT-Safe since it calls pwrite64, and it is now a syscall on all
  1193. @c targets.
  1194. This function is similar to the @code{pwritev} function with the difference
  1195. is that the @var{offset} parameter is of type @code{off64_t} instead of
  1196. @code{off_t}. It makes it possible on 32 bit machines to address
  1197. files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The
  1198. file descriptor @code{filedes} must be opened using @code{open64} since
  1199. otherwise the large offsets possible with @code{off64_t} will lead to
  1200. errors with a descriptor in small file mode.
  1201. When the source file is compiled using @code{_FILE_OFFSET_BITS == 64} on a
  1202. 32 bit machine this function is actually available under the name
  1203. @code{pwritev} and so transparently replaces the 32 bit interface.
  1204. @end deftypefun
  1205. @deftypefun ssize_t preadv2 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off_t @var{offset}, int @var{flags})
  1206. @standards{GNU, sys/uio.h}
  1207. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1208. @c This is a syscall for Linux v4.6. The sysdeps/posix fallback emulation
  1209. @c is also MT-Safe since it calls preadv.
  1210. This function is similar to the @code{preadv} function, with the
  1211. difference it adds an extra @var{flags} parameter of type @code{int}.
  1212. Additionally, if @var{offset} is @math{-1}, the current file position
  1213. is used and updated (like the @code{readv} function).
  1214. The supported @var{flags} are dependent of the underlying system. For
  1215. Linux it supports:
  1216. @vtable @code
  1217. @item RWF_HIPRI
  1218. High priority request. This adds a flag that tells the file system that
  1219. this is a high priority request for which it is worth to poll the hardware.
  1220. The flag is purely advisory and can be ignored if not supported. The
  1221. @var{fd} must be opened using @code{O_DIRECT}.
  1222. @item RWF_DSYNC
  1223. Per-IO synchronization as if the file was opened with @code{O_DSYNC} flag.
  1224. @item RWF_SYNC
  1225. Per-IO synchronization as if the file was opened with @code{O_SYNC} flag.
  1226. @item RWF_NOWAIT
  1227. Use nonblocking mode for this operation; that is, this call to @code{preadv2}
  1228. will fail and set @code{errno} to @code{EAGAIN} if the operation would block.
  1229. @item RWF_APPEND
  1230. Per-IO synchronization as if the file was opened with @code{O_APPEND} flag.
  1231. @item RWF_NOAPPEND
  1232. This flag allows an offset to be honored, even if the file was opened with
  1233. @code{O_APPEND} flag.
  1234. @item RWF_ATOMIC
  1235. Indicate that the write is to be issued with torn-write prevention. The
  1236. input buffer should follow some contraints: the total length should be
  1237. power-of-2 in size and also sizes between @code{atomic_write_unit_min}
  1238. and @code{atomic_write_unit_max}, the @code{struct iovec} count should be
  1239. up to @code{atomic_write_segments_max}, and the offset should be
  1240. naturally-aligned with regard to total write length.
  1241. The @code{atomic_*} values can be obtained with @code{statx} along with
  1242. @code{STATX_WRITE_ATOMIC} flag.
  1243. @item RWF_DONTCACHE
  1244. Use uncached buffered IO. If the file system does not support it, the
  1245. call will fail and set @code{errno} to @code{EOPNOTSUPP}.
  1246. This is a Linux-specific extension.
  1247. @end vtable
  1248. When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the
  1249. @code{preadv2} function is in fact @code{preadv64v2} and the type
  1250. @code{off_t} has 64 bits, which makes it possible to handle files up to
  1251. @twoexp{63} bytes in length.
  1252. The return value is a count of bytes (@emph{not} buffers) read, @math{0}
  1253. indicating end-of-file, or @math{-1} indicating an error. The possible
  1254. errors are the same as in @code{preadv} with the addition of:
  1255. @table @code
  1256. @item EOPNOTSUPP
  1257. @c The default sysdeps/posix code will return it for any flags value
  1258. @c different than 0.
  1259. An unsupported @var{flags} was used.
  1260. @end table
  1261. @end deftypefun
  1262. @deftypefun ssize_t preadv64v2 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off64_t @var{offset}, int @var{flags})
  1263. @standards{GNU, unistd.h}
  1264. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1265. @c This is a syscall for Linux v4.6. The sysdeps/posix fallback emulation
  1266. @c is also MT-Safe since it calls preadv.
  1267. This function is similar to the @code{preadv2} function with the difference
  1268. is that the @var{offset} parameter is of type @code{off64_t} instead of
  1269. @code{off_t}. It makes it possible on 32 bit machines to address
  1270. files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The
  1271. file descriptor @code{filedes} must be opened using @code{open64} since
  1272. otherwise the large offsets possible with @code{off64_t} will lead to
  1273. errors with a descriptor in small file mode.
  1274. When the source file is compiled using @code{_FILE_OFFSET_BITS == 64} on a
  1275. 32 bit machine this function is actually available under the name
  1276. @code{preadv2} and so transparently replaces the 32 bit interface.
  1277. @end deftypefun
  1278. @deftypefun ssize_t pwritev2 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off_t @var{offset}, int @var{flags})
  1279. @standards{GNU, sys/uio.h}
  1280. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1281. @c This is a syscall for Linux v4.6. The sysdeps/posix fallback emulation
  1282. @c is also MT-Safe since it calls pwritev.
  1283. This function is similar to the @code{pwritev} function, with the
  1284. difference it adds an extra @var{flags} parameter of type @code{int}.
  1285. Additionally, if @var{offset} is @math{-1}, the current file position
  1286. should is used and updated (like the @code{writev} function).
  1287. The supported @var{flags} are dependent of the underlying system. For
  1288. Linux, the supported flags are the same as those for @code{preadv2}.
  1289. When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the
  1290. @code{pwritev2} function is in fact @code{pwritev64v2} and the type
  1291. @code{off_t} has 64 bits, which makes it possible to handle files up to
  1292. @twoexp{63} bytes in length.
  1293. The return value is a count of bytes (@emph{not} buffers) write, @math{0}
  1294. indicating end-of-file, or @math{-1} indicating an error. The possible
  1295. errors are the same as in @code{preadv2}.
  1296. @end deftypefun
  1297. @deftypefun ssize_t pwritev64v2 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off64_t @var{offset}, int @var{flags})
  1298. @standards{GNU, unistd.h}
  1299. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1300. @c This is a syscall for Linux v4.6. The sysdeps/posix fallback emulation
  1301. @c is also MT-Safe since it calls pwritev.
  1302. This function is similar to the @code{pwritev2} function with the difference
  1303. is that the @var{offset} parameter is of type @code{off64_t} instead of
  1304. @code{off_t}. It makes it possible on 32 bit machines to address
  1305. files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The
  1306. file descriptor @code{filedes} must be opened using @code{open64} since
  1307. otherwise the large offsets possible with @code{off64_t} will lead to
  1308. errors with a descriptor in small file mode.
  1309. When the source file is compiled using @code{_FILE_OFFSET_BITS == 64} on a
  1310. 32 bit machine this function is actually available under the name
  1311. @code{pwritev2} and so transparently replaces the 32 bit interface.
  1312. @end deftypefun
  1313. @node Copying File Data
  1314. @section Copying data between two files
  1315. @cindex copying files
  1316. @cindex file copy
  1317. A special function is provided to copy data between two files on the
  1318. same file system. The system can optimize such copy operations. This
  1319. is particularly important on network file systems, where the data would
  1320. otherwise have to be transferred twice over the network.
  1321. Note that this function only copies file data, but not metadata such as
  1322. file permissions or extended attributes.
  1323. @deftypefun ssize_t copy_file_range (int @var{inputfd}, off64_t *@var{inputpos}, int @var{outputfd}, off64_t *@var{outputpos}, ssize_t @var{length}, unsigned int @var{flags})
  1324. @standards{GNU, unistd.h}
  1325. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1326. This function copies up to @var{length} bytes from the file descriptor
  1327. @var{inputfd} to the file descriptor @var{outputfd}.
  1328. The function can operate on both the current file position (like
  1329. @code{read} and @code{write}) and an explicit offset (like @code{pread}
  1330. and @code{pwrite}). If the @var{inputpos} pointer is null, the file
  1331. position of @var{inputfd} is used as the starting point of the copy
  1332. operation, and the file position is advanced during it. If
  1333. @var{inputpos} is not null, then @code{*@var{inputpos}} is used as the
  1334. starting point of the copy operation, and @code{*@var{inputpos}} is
  1335. incremented by the number of copied bytes, but the file position remains
  1336. unchanged. Similar rules apply to @var{outputfd} and @var{outputpos}
  1337. for the output file position.
  1338. The @var{flags} argument is currently reserved and must be zero.
  1339. The @code{copy_file_range} function returns the number of bytes copied.
  1340. This can be less than the specified @var{length} in case the input file
  1341. contains fewer remaining bytes than @var{length}, or if a read or write
  1342. failure occurs. The return value is zero if the end of the input file
  1343. is encountered immediately.
  1344. If no bytes can be copied, to report an error, @code{copy_file_range}
  1345. returns the value @math{-1} and sets @code{errno}. The table below
  1346. lists some of the error conditions for this function.
  1347. @table @code
  1348. @item ENOSYS
  1349. The kernel does not implement the required functionality.
  1350. @item EISDIR
  1351. At least one of the descriptors @var{inputfd} or @var{outputfd} refers
  1352. to a directory.
  1353. @item EINVAL
  1354. At least one of the descriptors @var{inputfd} or @var{outputfd} refers
  1355. to a non-regular, non-directory file (such as a socket or a FIFO).
  1356. The input or output positions before are after the copy operations are
  1357. outside of an implementation-defined limit.
  1358. The @var{flags} argument is not zero.
  1359. @item EFBIG
  1360. The new file size would exceed the process file size limit.
  1361. @xref{Limits on Resources}.
  1362. The input or output positions before are after the copy operations are
  1363. outside of an implementation-defined limit. This can happen if the file
  1364. was not opened with large file support (LFS) on 32-bit machines, and the
  1365. copy operation would create a file which is larger than what
  1366. @code{off_t} could represent.
  1367. @item EBADF
  1368. The argument @var{inputfd} is not a valid file descriptor open for
  1369. reading.
  1370. The argument @var{outputfd} is not a valid file descriptor open for
  1371. writing, or @var{outputfd} has been opened with @code{O_APPEND}.
  1372. @end table
  1373. In addition, @code{copy_file_range} can fail with the error codes
  1374. which are used by @code{read}, @code{pread}, @code{write}, and
  1375. @code{pwrite}.
  1376. The @code{copy_file_range} function is a cancellation point. In case of
  1377. cancellation, the input location (the file position or the value at
  1378. @code{*@var{inputpos}}) is indeterminate.
  1379. @end deftypefun
  1380. @node Memory-mapped I/O
  1381. @section Memory-mapped I/O
  1382. On modern operating systems, it is possible to @dfn{mmap} (pronounced
  1383. ``em-map'') a file to a region of memory. When this is done, the file can
  1384. be accessed just like an array in the program.
  1385. This is more efficient than @code{read} or @code{write}, as only the regions
  1386. of the file that a program actually accesses are loaded. Accesses to
  1387. not-yet-loaded parts of the mmapped region are handled in the same way as
  1388. swapped out pages.
  1389. Since mmapped pages can be stored back to their file when physical
  1390. memory is low, it is possible to mmap files orders of magnitude larger
  1391. than both the physical memory @emph{and} swap space. The only limit is
  1392. address space. The theoretical limit is 4GB on a 32-bit machine -
  1393. however, the actual limit will be smaller since some areas will be
  1394. reserved for other purposes. If the LFS interface is used the file size
  1395. on 32-bit systems is not limited to 2GB (offsets are signed which
  1396. reduces the addressable area of 4GB by half); the full 64-bit are
  1397. available.
  1398. Memory mapping only works on entire pages of memory. Thus, addresses
  1399. for mapping must be page-aligned, and length values will be rounded up.
  1400. To determine the default size of a page the machine uses one should use:
  1401. @vindex _SC_PAGESIZE
  1402. @smallexample
  1403. size_t page_size = (size_t) sysconf (_SC_PAGESIZE);
  1404. @end smallexample
  1405. On some systems, mappings can use larger page sizes
  1406. for certain files, and applications can request larger page sizes for
  1407. anonymous mappings as well (see the @code{MAP_HUGETLB} flag below).
  1408. The following functions are declared in @file{sys/mman.h}:
  1409. @deftypefun {void *} mmap (void *@var{address}, size_t @var{length}, int @var{protect}, int @var{flags}, int @var{filedes}, off_t @var{offset})
  1410. @standards{POSIX, sys/mman.h}
  1411. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1412. The @code{mmap} function creates a new mapping, connected to bytes
  1413. (@var{offset}) to (@var{offset} + @var{length} - 1) in the file open on
  1414. @var{filedes}. A new reference for the file specified by @var{filedes}
  1415. is created, which is not removed by closing the file.
  1416. @var{address} gives a preferred starting address for the mapping.
  1417. @code{NULL} expresses no preference. Any previous mapping at that
  1418. address is automatically removed. The address you give may still be
  1419. changed, unless you use the @code{MAP_FIXED} flag.
  1420. @var{protect} contains flags that control what kind of access is
  1421. permitted. They include @code{PROT_READ}, @code{PROT_WRITE}, and
  1422. @code{PROT_EXEC}. The special flag @code{PROT_NONE} reserves a region
  1423. of address space for future use. The @code{mprotect} function can be
  1424. used to change the protection flags. @xref{Memory Protection}.
  1425. The @var{flags} parameter contains flags that control the nature of
  1426. the map. One of @code{MAP_SHARED}, @code{MAP_SHARED_VALIDATE}, or
  1427. @code{MAP_PRIVATE} must be specified. Additional flags may be bitwise
  1428. OR'd to further define the mapping.
  1429. Note that, aside from @code{MAP_PRIVATE} and @code{MAP_SHARED}, not
  1430. all flags are supported on all versions of all operating systems.
  1431. Consult the kernel-specific documentation for details. The flags
  1432. include:
  1433. @vtable @code
  1434. @item MAP_PRIVATE
  1435. This specifies that writes to the region should never be written back
  1436. to the attached file. Instead, a copy is made for the process, and the
  1437. region will be swapped normally if memory runs low. No other process will
  1438. see the changes.
  1439. Since private mappings effectively revert to ordinary memory
  1440. when written to, you must have enough virtual memory for a copy of
  1441. the entire mmapped region if you use this mode with @code{PROT_WRITE}.
  1442. @item MAP_SHARED
  1443. This specifies that writes to the region will be written back to the
  1444. file. Changes made will be shared immediately with other processes
  1445. mmaping the same file.
  1446. Note that actual writing may take place at any time. You need to use
  1447. @code{msync}, described below, if it is important that other processes
  1448. using conventional I/O get a consistent view of the file.
  1449. @item MAP_SHARED_VALIDATE
  1450. Similar to @code{MAP_SHARED} except that additional flags will be
  1451. validated by the kernel, and the call will fail if an unrecognized
  1452. flag is provided. With @code{MAP_SHARED} using a flag on a kernel
  1453. that doesn't support it causes the flag to be ignored.
  1454. @code{MAP_SHARED_VALIDATE} should be used when the behavior of all
  1455. flags is required.
  1456. @item MAP_FIXED
  1457. This forces the system to use the exact mapping address specified in
  1458. @var{address} and fail if it can't. Note that if the new mapping
  1459. would overlap an existing mapping, the overlapping portion of the
  1460. existing map is unmapped.
  1461. @c One of these is official - the other is obviously an obsolete synonym
  1462. @c Which is which?
  1463. @item MAP_ANONYMOUS
  1464. @itemx MAP_ANON
  1465. @standards{POSIX.1-2024, sys/mman.h}
  1466. @standardsx{MAP_ANON, POSIX.1-2024, sys/mman.h}
  1467. This flag tells the system to create an anonymous mapping, not connected
  1468. to a file. @var{filedes} and @var{offset} are ignored, and the region is
  1469. initialized with zeros.
  1470. Anonymous maps are used as the basic primitive to extend the heap on some
  1471. systems. They are also useful to share data between multiple tasks
  1472. without creating a file.
  1473. On some systems using private anonymous mmaps is more efficient than using
  1474. @code{malloc} for large blocks. This is not an issue with @theglibc{},
  1475. as the included @code{malloc} automatically uses @code{mmap} where appropriate.
  1476. @item MAP_HUGETLB
  1477. @standards{Linux, sys/mman.h}
  1478. This requests that the system uses an alternative page size which is
  1479. larger than the default page size for the mapping. For some workloads,
  1480. increasing the page size for large mappings improves performance because
  1481. the system needs to handle far fewer pages. For other workloads which
  1482. require frequent transfer of pages between storage or different nodes,
  1483. the decreased page granularity may cause performance problems due to the
  1484. increased page size and larger transfers.
  1485. In order to create the mapping, the system needs physically contiguous
  1486. memory of the size of the increased page size. As a result,
  1487. @code{MAP_HUGETLB} mappings are affected by memory fragmentation, and
  1488. their creation can fail even if plenty of memory is available in the
  1489. system.
  1490. Not all file systems support mappings with an increased page size.
  1491. The @code{MAP_HUGETLB} flag is specific to Linux.
  1492. @c There is a mechanism to select different hugepage sizes; see
  1493. @c include/uapi/asm-generic/hugetlb_encode.h in the kernel sources.
  1494. @item MAP_32BIT
  1495. Require addresses that can be accessed with a signed 32 bit pointer,
  1496. i.e., within the first 2 GiB. Ignored if MAP_FIXED is specified.
  1497. @item MAP_DENYWRITE
  1498. @itemx MAP_EXECUTABLE
  1499. @itemx MAP_FILE
  1500. Provided for compatibility. Ignored by the Linux kernel.
  1501. @item MAP_FIXED_NOREPLACE
  1502. Similar to @code{MAP_FIXED} except the call will fail with
  1503. @code{EEXIST} if the new mapping would overwrite an existing mapping.
  1504. To test for support for this flag, specify MAP_FIXED_NOREPLACE without
  1505. MAP_FIXED, and (if the call was successful) check the actual address
  1506. returned. If it does not match the address passed, then this flag is
  1507. not supported.
  1508. @item MAP_GROWSDOWN
  1509. This flag is used to make stacks, and is typically only needed inside
  1510. the program loader to set up the main stack for the running process.
  1511. The mapping is created according to the other flags, except an
  1512. additional page just prior to the mapping is marked as a ``guard
  1513. page''. If a write is attempted inside this guard page, that page is
  1514. mapped, the mapping is extended, and a new guard page is created.
  1515. Thus, the mapping continues to grow towards lower addresses until it
  1516. encounters some other mapping.
  1517. Note that accessing memory beyond the guard page will not trigger this
  1518. feature. In gcc, use @code{-fstack-clash-protection} to ensure the
  1519. guard page is always touched.
  1520. @item MAP_LOCKED
  1521. A hint that requests that mapped pages are locked in memory (i.e. not
  1522. paged out). Note that this is a request and not a requirement; use
  1523. @code{mlock} if locking is required.
  1524. @item MAP_POPULATE
  1525. @itemx MAP_NONBLOCK
  1526. @code{MAP_POPULATE} is a hint that requests that the kernel read-ahead
  1527. a file-backed mapping, causing pages to be mapped before they're
  1528. needed. @code{MAP_NONBLOCK} is a hint that requests that the kernel
  1529. @emph{not} attempt such except for pages are already in memory. Note
  1530. that neither of these hints affects future paging activity, use
  1531. @code{mlock} if such needs to be controlled.
  1532. @item MAP_NORESERVE
  1533. Asks the kernel to not reserve physical backing (i.e. space in a swap
  1534. device) for a mapping. This would be useful for, for example, a very
  1535. large but sparsely used mapping which need not be limited in total
  1536. length by available RAM, but with very few mapped pages. Note that
  1537. writes to such a mapping may cause a @code{SIGSEGV} if the system is
  1538. unable to map a page due to lack of resources.
  1539. On Linux, this flag's behavior may be overwridden by
  1540. @file{/proc/sys/vm/overcommit_memory} as documented in the proc(5) man
  1541. page.
  1542. @item MAP_STACK
  1543. Ensures that the resulting mapping is suitable for use as a program
  1544. stack. For example, the use of huge pages might be precluded.
  1545. @item MAP_SYNC
  1546. This is a special flag for DAX devices, which tells the kernel to
  1547. write dirty metadata out whenever dirty data is written out. Unlike
  1548. most other flags, this one will fail unless @code{MAP_SHARED_VALIDATE}
  1549. is also given.
  1550. @item MAP_DROPPABLE
  1551. Request the page to be never written out to swap, it will be zeroed
  1552. under memory pressure (so kernel can just drop the page), it is inherited
  1553. by fork, it is not counted against @code{mlock} budget, and if there is
  1554. not enough memory to service a page fault there is no fatal error (so no
  1555. signal is sent).
  1556. The @code{MAP_DROPPABLE} flag is specific to Linux.
  1557. @end vtable
  1558. @code{mmap} returns the address of the new mapping, or
  1559. @code{MAP_FAILED} for an error.
  1560. Possible errors include:
  1561. @table @code
  1562. @item EACCES
  1563. @var{filedes} was not open for the type of access specified in @var{protect}.
  1564. @item EAGAIN
  1565. The system has temporarily run out of resources.
  1566. @item EBADF
  1567. The @var{fd} passed is invalid, and a valid file descriptor is
  1568. required (i.e. MAP_ANONYMOUS was not specified).
  1569. @item EEXIST
  1570. @code{MAP_FIXED_NOREPLACE} was specified and an existing mapping was
  1571. found overlapping the requested address range.
  1572. @item EINVAL
  1573. Either @var{address} was unusable (because it is not a multiple of the
  1574. applicable page size), or inconsistent @var{flags} were given.
  1575. If @code{MAP_HUGETLB} was specified, the file or system does not support
  1576. large page sizes.
  1577. @item ENODEV
  1578. This file is of a type that doesn't support mapping, the process has
  1579. exceeded its data space limit, or the map request would exceed the
  1580. process's virtual address space.
  1581. @item ENOMEM
  1582. There is not enough memory for the operation, the process is out of
  1583. address space, or there are too many mappings. On Linux, the maximum
  1584. number of mappings can be controlled via
  1585. @file{/proc/sys/vm/max_map_count} or, if your OS supports it, via
  1586. the @code{vm.max_map_count} @code{sysctl} setting.
  1587. @item ENOEXEC
  1588. The file is on a filesystem that doesn't support mapping.
  1589. @item EPERM
  1590. @code{PROT_EXEC} was requested but the file is on a filesystem that
  1591. was mounted with execution denied, a file seal prevented the mapping,
  1592. or the caller set MAP_HUDETLB but does not have the required
  1593. priviledges.
  1594. @item EOVERFLOW
  1595. Either the offset into the file plus the length of the mapping causes
  1596. internal page counts to overflow, or the offset requested exceeds the
  1597. length of the file.
  1598. @c On Linux, EAGAIN will appear if the file has a conflicting mandatory lock.
  1599. @c However mandatory locks are not discussed in this manual.
  1600. @c
  1601. @c Similarly, ETXTBSY will occur if the MAP_DENYWRITE flag (not documented
  1602. @c here) is used and the file is already open for writing.
  1603. @end table
  1604. @end deftypefun
  1605. @deftypefun {void *} mmap64 (void *@var{address}, size_t @var{length}, int @var{protect}, int @var{flags}, int @var{filedes}, off64_t @var{offset})
  1606. @standards{LFS, sys/mman.h}
  1607. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1608. @c The page_shift auto detection when MMAP2_PAGE_SHIFT is -1 (it never
  1609. @c is) would be thread-unsafe.
  1610. The @code{mmap64} function is equivalent to the @code{mmap} function but
  1611. the @var{offset} parameter is of type @code{off64_t}. On 32-bit systems
  1612. this allows the file associated with the @var{filedes} descriptor to be
  1613. larger than 2GB. @var{filedes} must be a descriptor returned from a
  1614. call to @code{open64} or @code{fopen64} and @code{freopen64} where the
  1615. descriptor is retrieved with @code{fileno}.
  1616. When the sources are translated with @code{_FILE_OFFSET_BITS == 64} this
  1617. function is actually available under the name @code{mmap}. I.e., the
  1618. new, extended API using 64 bit file sizes and offsets transparently
  1619. replaces the old API.
  1620. @end deftypefun
  1621. @deftypefun int munmap (void *@var{addr}, size_t @var{length})
  1622. @standards{POSIX, sys/mman.h}
  1623. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1624. @code{munmap} removes any memory maps from (@var{addr}) to (@var{addr} +
  1625. @var{length}). @var{length} should be the length of the mapping.
  1626. It is safe to unmap multiple mappings in one command, or include unmapped
  1627. space in the range. It is also possible to unmap only part of an existing
  1628. mapping. However, only entire pages can be removed. If @var{length} is not
  1629. an even number of pages, it will be rounded up.
  1630. It returns @math{0} for success and @math{-1} for an error.
  1631. One error is possible:
  1632. @table @code
  1633. @item EINVAL
  1634. The memory range given was outside the user mmap range or wasn't page
  1635. aligned.
  1636. @end table
  1637. @end deftypefun
  1638. @deftypefun int msync (void *@var{address}, size_t @var{length}, int @var{flags})
  1639. @standards{POSIX, sys/mman.h}
  1640. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1641. When using shared mappings, the kernel can write the file at any time
  1642. before the mapping is removed. To be certain data has actually been
  1643. written to the file and will be accessible to non-memory-mapped I/O, it
  1644. is necessary to use this function.
  1645. It operates on the region @var{address} to (@var{address} + @var{length}).
  1646. It may be used on part of a mapping or multiple mappings, however the
  1647. region given should not contain any unmapped space.
  1648. @var{flags} can contain some options:
  1649. @vtable @code
  1650. @item MS_SYNC
  1651. This flag makes sure the data is actually written @emph{to disk}.
  1652. Normally @code{msync} only makes sure that accesses to a file with
  1653. conventional I/O reflect the recent changes.
  1654. @item MS_ASYNC
  1655. This tells @code{msync} to begin the synchronization, but not to wait for
  1656. it to complete.
  1657. @c Linux also has MS_INVALIDATE, which I don't understand.
  1658. @end vtable
  1659. @code{msync} returns @math{0} for success and @math{-1} for
  1660. error. Errors include:
  1661. @table @code
  1662. @item EINVAL
  1663. An invalid region was given, or the @var{flags} were invalid.
  1664. @item EFAULT
  1665. There is no existing mapping in at least part of the given region.
  1666. @end table
  1667. @end deftypefun
  1668. @deftypefun {void *} mremap (void *@var{address}, size_t @var{length}, size_t @var{new_length}, int @var{flag}, ... /* void *@var{new_address} */)
  1669. @standards{GNU, sys/mman.h}
  1670. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1671. This function can be used to change the size of an existing memory
  1672. area. @var{address} and @var{length} must cover a region entirely mapped
  1673. in the same @code{mmap} statement. A new mapping with the same
  1674. characteristics will be returned with the length @var{new_length}.
  1675. Possible flags are
  1676. @table @code
  1677. @item MREMAP_MAYMOVE
  1678. If it is given in @var{flags}, the system may remove the existing mapping
  1679. and create a new one of the desired length in another location.
  1680. @item MREMAP_FIXED
  1681. If it is given in @var{flags}, @code{mremap} accepts a fifth argument,
  1682. @code{void *new_address}, which specifies a page-aligned address to
  1683. which the mapping must be moved. Any previous mapping at the address
  1684. range specified by @var{new_address} and @var{new_size} is unmapped.
  1685. @code{MREMAP_FIXED} must be used together with @code{MREMAP_MAYMOVE}.
  1686. @item MREMAP_DONTUNMAP
  1687. If it is given in @var{flags}, @code{mremap} accepts a fifth argument,
  1688. @code{void *new_address}, which specifies a page-aligned address. Any
  1689. previous mapping at the address range specified by @var{new_address} and
  1690. @var{new_size} is unmapped. If @var{new_address} is @code{NULL}, the
  1691. kernel chooses the page-aligned address at which to create the mapping.
  1692. Otherwise, the kernel takes it as a hint about where to place the mapping.
  1693. The mapping at the address range specified by @var{old_address} and
  1694. @var{old_size} isn't unmapped.
  1695. @code{MREMAP_DONTUNMAP} must be used together with @code{MREMAP_MAYMOVE}.
  1696. @var{old_size} must be the same as @var{new_size}. This flag bit is
  1697. Linux-specific.
  1698. @end table
  1699. The address of the resulting mapping is returned, or @code{MAP_FAILED}.
  1700. Possible error codes include:
  1701. @table @code
  1702. @item EFAULT
  1703. There is no existing mapping in at least part of the original region, or
  1704. the region covers two or more distinct mappings.
  1705. @item EINVAL
  1706. Any arguments are inappropriate, including unknown @var{flags} values.
  1707. @item EAGAIN
  1708. The region has pages locked, and if extended it would exceed the
  1709. process's resource limit for locked pages. @xref{Limits on Resources}.
  1710. @item ENOMEM
  1711. The region is private writable, and insufficient virtual memory is
  1712. available to extend it. Also, this error will occur if
  1713. @code{MREMAP_MAYMOVE} is not given and the extension would collide with
  1714. another mapped region.
  1715. @end table
  1716. @end deftypefun
  1717. This function is only available on a few systems. Except for performing
  1718. optional optimizations one should not rely on this function.
  1719. Not all file descriptors may be mapped. Sockets, pipes, and most devices
  1720. only allow sequential access and do not fit into the mapping abstraction.
  1721. In addition, some regular files may not be mmapable, and older kernels may
  1722. not support mapping at all. Thus, programs using @code{mmap} should
  1723. have a fallback method to use should it fail. @xref{Mmap,,,standards,GNU
  1724. Coding Standards}.
  1725. @deftypefun int madvise (void *@var{addr}, size_t @var{length}, int @var{advice})
  1726. @standards{POSIX, sys/mman.h}
  1727. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  1728. This function can be used to provide the system with @var{advice} about
  1729. the intended usage patterns of the memory region starting at @var{addr}
  1730. and extending @var{length} bytes.
  1731. The valid BSD values for @var{advice} are:
  1732. @vtable @code
  1733. @item MADV_NORMAL
  1734. The region should receive no further special treatment.
  1735. @item MADV_RANDOM
  1736. The region will be accessed via random page references. The kernel
  1737. should page-in the minimal number of pages for each page fault.
  1738. @item MADV_SEQUENTIAL
  1739. The region will be accessed via sequential page references. This
  1740. may cause the kernel to aggressively read-ahead, expecting further
  1741. sequential references after any page fault within this region.
  1742. @item MADV_WILLNEED
  1743. The region will be needed. The pages within this region may
  1744. be pre-faulted in by the kernel.
  1745. @item MADV_DONTNEED
  1746. The region is no longer needed. The kernel may free these pages,
  1747. causing any changes to the pages to be lost, as well as swapped
  1748. out pages to be discarded.
  1749. @item MADV_HUGEPAGE
  1750. @standards{Linux, sys/mman.h}
  1751. Indicate that it is beneficial to increase the page size for this
  1752. mapping. This can improve performance for larger mappings because the
  1753. system needs to handle far fewer pages. However, if parts of the
  1754. mapping are frequently transferred between storage or different nodes,
  1755. performance may suffer because individual transfers can become
  1756. substantially larger due to the increased page size.
  1757. This flag is specific to Linux.
  1758. @item MADV_NOHUGEPAGE
  1759. Undo the effect of a previous @code{MADV_HUGEPAGE} advice. This flag
  1760. is specific to Linux.
  1761. @end vtable
  1762. The POSIX names are slightly different, but with the same meanings:
  1763. @vtable @code
  1764. @item POSIX_MADV_NORMAL
  1765. This corresponds with BSD's @code{MADV_NORMAL}.
  1766. @item POSIX_MADV_RANDOM
  1767. This corresponds with BSD's @code{MADV_RANDOM}.
  1768. @item POSIX_MADV_SEQUENTIAL
  1769. This corresponds with BSD's @code{MADV_SEQUENTIAL}.
  1770. @item POSIX_MADV_WILLNEED
  1771. This corresponds with BSD's @code{MADV_WILLNEED}.
  1772. @item POSIX_MADV_DONTNEED
  1773. This corresponds with BSD's @code{MADV_DONTNEED}.
  1774. @end vtable
  1775. @code{madvise} returns @math{0} for success and @math{-1} for
  1776. error. Errors include:
  1777. @table @code
  1778. @item EINVAL
  1779. An invalid region was given, or the @var{advice} was invalid.
  1780. @item EFAULT
  1781. There is no existing mapping in at least part of the given region.
  1782. @end table
  1783. @end deftypefun
  1784. @deftypefn Function int shm_open (const char *@var{name}, int @var{oflag}, mode_t @var{mode})
  1785. @standards{POSIX, sys/mman.h}
  1786. @safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asuinit{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}}
  1787. @c shm_open @mtslocale @asuinit @ascuheap @asulock @aculock @acsmem @acsfd
  1788. @c libc_once(where_is_shmfs) @mtslocale @asuinit @ascuheap @asulock @aculock @acsmem @acsfd
  1789. @c where_is_shmfs @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd
  1790. @c statfs dup ok
  1791. @c setmntent dup @ascuheap @asulock @acsmem @acsfd @aculock
  1792. @c getmntent_r dup @mtslocale @ascuheap @aculock @acsmem [no @asucorrupt @acucorrupt; exclusive stream]
  1793. @c strcmp dup ok
  1794. @c strlen dup ok
  1795. @c malloc dup @ascuheap @acsmem
  1796. @c mempcpy dup ok
  1797. @c endmntent dup @ascuheap @asulock @aculock @acsmem @acsfd
  1798. @c strlen dup ok
  1799. @c strchr dup ok
  1800. @c mempcpy dup ok
  1801. @c open dup @acsfd
  1802. @c fcntl dup ok
  1803. @c close dup @acsfd
  1804. This function returns a file descriptor that can be used to allocate shared
  1805. memory via mmap. Unrelated processes can use same @var{name} to create or
  1806. open existing shared memory objects.
  1807. A @var{name} argument specifies the shared memory object to be opened.
  1808. In @theglibc{} it must be a string smaller than @code{NAME_MAX} bytes starting
  1809. with an optional slash but containing no other slashes.
  1810. The semantics of @var{oflag} and @var{mode} arguments is same as in @code{open}.
  1811. @code{shm_open} returns the file descriptor on success or @math{-1} on error.
  1812. On failure @code{errno} is set.
  1813. @end deftypefn
  1814. @deftypefn Function int shm_unlink (const char *@var{name})
  1815. @safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asuinit{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}}
  1816. @c shm_unlink @mtslocale @asuinit @ascuheap @asulock @aculock @acsmem @acsfd
  1817. @c libc_once(where_is_shmfs) dup @mtslocale @asuinit @ascuheap @asulock @aculock @acsmem @acsfd
  1818. @c strlen dup ok
  1819. @c strchr dup ok
  1820. @c mempcpy dup ok
  1821. @c unlink dup ok
  1822. This function is the inverse of @code{shm_open} and removes the object with
  1823. the given @var{name} previously created by @code{shm_open}.
  1824. @code{shm_unlink} returns @math{0} on success or @math{-1} on error.
  1825. On failure @code{errno} is set.
  1826. @end deftypefn
  1827. @deftypefun int memfd_create (const char *@var{name}, unsigned int @var{flags})
  1828. @standards{Linux, sys/mman.h}
  1829. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
  1830. The @code{memfd_create} function returns a file descriptor which can be
  1831. used to create memory mappings using the @code{mmap} function. It is
  1832. similar to the @code{shm_open} function in the sense that these mappings
  1833. are not backed by actual files. However, the descriptor returned by
  1834. @code{memfd_create} does not correspond to a named object; the
  1835. @var{name} argument is used for debugging purposes only (e.g., will
  1836. appear in @file{/proc}), and separate invocations of @code{memfd_create}
  1837. with the same @var{name} will not return descriptors for the same region
  1838. of memory. The descriptor can also be used to create alias mappings
  1839. within the same process.
  1840. The descriptor initially refers to a zero-length file. Before mappings
  1841. can be created which are backed by memory, the file size needs to be
  1842. increased with the @code{ftruncate} function. @xref{File Size}.
  1843. The @var{flags} argument can be a combination of the following flags:
  1844. @vtable @code
  1845. @item MFD_CLOEXEC
  1846. @standards{Linux, sys/mman.h}
  1847. The descriptor is created with the @code{O_CLOEXEC} flag.
  1848. @item MFD_ALLOW_SEALING
  1849. @standards{Linux, sys/mman.h}
  1850. The descriptor supports the addition of seals using the @code{fcntl}
  1851. function.
  1852. @item MFD_HUGETLB
  1853. @standards{Linux, sys/mman.h}
  1854. This requests that mappings created using the returned file descriptor
  1855. use a larger page size. See @code{MAP_HUGETLB} above for details.
  1856. This flag is incompatible with @code{MFD_ALLOW_SEALING}.
  1857. @end vtable
  1858. @code{memfd_create} returns a file descriptor on success, and @math{-1}
  1859. on failure.
  1860. The following @code{errno} error conditions are defined for this
  1861. function:
  1862. @table @code
  1863. @item EINVAL
  1864. An invalid combination is specified in @var{flags}, or @var{name} is
  1865. too long.
  1866. @item EFAULT
  1867. The @var{name} argument does not point to a string.
  1868. @item EMFILE
  1869. The operation would exceed the file descriptor limit for this process.
  1870. @item ENFILE
  1871. The operation would exceed the system-wide file descriptor limit.
  1872. @item ENOMEM
  1873. There is not enough memory for the operation.
  1874. @end table
  1875. @end deftypefun
  1876. @node Waiting for I/O
  1877. @section Waiting for Input or Output
  1878. @cindex waiting for input or output
  1879. @cindex multiplexing input
  1880. @cindex input from multiple files
  1881. Sometimes a program needs to accept input on multiple input channels
  1882. whenever input arrives. For example, some workstations may have devices
  1883. such as a digitizing tablet, function button box, or dial box that are
  1884. connected via normal asynchronous serial interfaces; good user interface
  1885. style requires responding immediately to input on any device. Another
  1886. example is a program that acts as a server to several other processes
  1887. via pipes or sockets.
  1888. You cannot normally use @code{read} for this purpose, because this
  1889. blocks the program until input is available on one particular file
  1890. descriptor; input on other channels won't wake it up. You could set
  1891. nonblocking mode and poll each file descriptor in turn, but this is very
  1892. inefficient.
  1893. A better solution is to use the @code{select} function. This blocks the
  1894. program until input or output is ready on a specified set of file
  1895. descriptors, or until a timer expires, whichever comes first. This
  1896. facility is declared in the header file @file{sys/select.h}.
  1897. @pindex sys/select.h
  1898. In the case of a server socket (@pxref{Listening}), we say that
  1899. ``input'' is available when there are pending connections that could be
  1900. accepted (@pxref{Accepting Connections}). @code{accept} for server
  1901. sockets blocks and interacts with @code{select} just as @code{read} does
  1902. for normal input.
  1903. @cindex file descriptor sets, for @code{select}
  1904. The file descriptor sets for the @code{select} function are specified
  1905. as @code{fd_set} objects. Here is the description of the data type
  1906. and some macros for manipulating these objects.
  1907. @deftp {Data Type} fd_set
  1908. @standards{POSIX.1, sys/select.h}
  1909. The @code{fd_set} data type represents file descriptor sets for the
  1910. @code{select} function. It is actually a bit array.
  1911. @end deftp
  1912. @deftypevr Macro int FD_SETSIZE
  1913. @standards{POSIX.1, sys/select.h}
  1914. The value of this macro is the maximum number of file descriptors that a
  1915. @code{fd_set} object can hold information about. On systems with a
  1916. fixed maximum number, @code{FD_SETSIZE} is at least that number. On
  1917. some systems, including GNU, there is no absolute limit on the number of
  1918. descriptors open, but this macro still has a constant value which
  1919. controls the number of bits in an @code{fd_set}; if you get a file
  1920. descriptor with a value as high as @code{FD_SETSIZE}, you cannot put
  1921. that descriptor into an @code{fd_set}.
  1922. @end deftypevr
  1923. @deftypefn Macro void FD_ZERO (fd_set *@var{set})
  1924. @standards{POSIX.1, sys/select.h}
  1925. @safety{@prelim{}@mtsafe{@mtsrace{:set}}@assafe{}@acsafe{}}
  1926. This macro initializes the file descriptor set @var{set} to be the
  1927. empty set.
  1928. @end deftypefn
  1929. @deftypefn Macro void FD_SET (int @var{filedes}, fd_set *@var{set})
  1930. @standards{POSIX.1, sys/select.h}
  1931. @safety{@prelim{}@mtsafe{@mtsrace{:set}}@assafe{}@acsafe{}}
  1932. @c Setting a bit isn't necessarily atomic, so there's a potential race
  1933. @c here if set is not used exclusively.
  1934. This macro adds @var{filedes} to the file descriptor set @var{set}.
  1935. The @var{filedes} parameter must not have side effects since it is
  1936. evaluated more than once.
  1937. @end deftypefn
  1938. @deftypefn Macro void FD_CLR (int @var{filedes}, fd_set *@var{set})
  1939. @standards{POSIX.1, sys/select.h}
  1940. @safety{@prelim{}@mtsafe{@mtsrace{:set}}@assafe{}@acsafe{}}
  1941. @c Setting a bit isn't necessarily atomic, so there's a potential race
  1942. @c here if set is not used exclusively.
  1943. This macro removes @var{filedes} from the file descriptor set @var{set}.
  1944. The @var{filedes} parameter must not have side effects since it is
  1945. evaluated more than once.
  1946. @end deftypefn
  1947. @deftypefn Macro int FD_ISSET (int @var{filedes}, const fd_set *@var{set})
  1948. @standards{POSIX.1, sys/select.h}
  1949. @safety{@prelim{}@mtsafe{@mtsrace{:set}}@assafe{}@acsafe{}}
  1950. This macro returns a nonzero value (true) if @var{filedes} is a member
  1951. of the file descriptor set @var{set}, and zero (false) otherwise.
  1952. The @var{filedes} parameter must not have side effects since it is
  1953. evaluated more than once.
  1954. @end deftypefn
  1955. Next, here is the description of the @code{select} function itself.
  1956. @deftypefun int select (int @var{nfds}, fd_set *@var{read-fds}, fd_set *@var{write-fds}, fd_set *@var{except-fds}, struct timeval *@var{timeout})
  1957. @standards{POSIX.1, sys/select.h}
  1958. @safety{@prelim{}@mtsafe{@mtsrace{:read-fds} @mtsrace{:write-fds} @mtsrace{:except-fds}}@assafe{}@acsafe{}}
  1959. @c The select syscall is preferred, but pselect6 may be used instead,
  1960. @c which requires converting timeout to a timespec and back. The
  1961. @c conversions are not atomic.
  1962. The @code{select} function blocks the calling process until there is
  1963. activity on any of the specified sets of file descriptors, or until the
  1964. timeout period has expired.
  1965. The file descriptors specified by the @var{read-fds} argument are
  1966. checked to see if they are ready for reading; the @var{write-fds} file
  1967. descriptors are checked to see if they are ready for writing; and the
  1968. @var{except-fds} file descriptors are checked for exceptional
  1969. conditions. You can pass a null pointer for any of these arguments if
  1970. you are not interested in checking for that kind of condition.
  1971. A file descriptor is considered ready for reading if a @code{read}
  1972. call will not block. This usually includes the read offset being at
  1973. the end of the file or there is an error to report. A server socket
  1974. is considered ready for reading if there is a pending connection which
  1975. can be accepted with @code{accept}; @pxref{Accepting Connections}. A
  1976. client socket is ready for writing when its connection is fully
  1977. established; @pxref{Connecting}.
  1978. ``Exceptional conditions'' does not mean errors---errors are reported
  1979. immediately when an erroneous system call is executed, and do not
  1980. constitute a state of the descriptor. Rather, they include conditions
  1981. such as the presence of an urgent message on a socket. (@xref{Sockets},
  1982. for information on urgent messages.)
  1983. The @code{select} function checks only the first @var{nfds} file
  1984. descriptors. The usual thing is to pass @code{FD_SETSIZE} as the value
  1985. of this argument.
  1986. The @var{timeout} specifies the maximum time to wait. If you pass a
  1987. null pointer for this argument, it means to block indefinitely until
  1988. one of the file descriptors is ready. Otherwise, you should provide
  1989. the time in @code{struct timeval} format; see @ref{Time Types}.
  1990. Specify zero as the time (a @code{struct timeval} containing all
  1991. zeros) if you want to find out which descriptors are ready without
  1992. waiting if none are ready.
  1993. The normal return value from @code{select} is the total number of ready file
  1994. descriptors in all of the sets. Each of the argument sets is overwritten
  1995. with information about the descriptors that are ready for the corresponding
  1996. operation. Thus, to see if a particular descriptor @var{desc} has input,
  1997. use @code{FD_ISSET (@var{desc}, @var{read-fds})} after @code{select} returns.
  1998. If @code{select} returns because the timeout period expires, it returns
  1999. a value of zero.
  2000. Any signal will cause @code{select} to return immediately. So if your
  2001. program uses signals, you can't rely on @code{select} to keep waiting
  2002. for the full time specified. If you want to be sure of waiting for a
  2003. particular amount of time, you must check for @code{EINTR} and repeat
  2004. the @code{select} with a newly calculated timeout based on the current
  2005. time. See the example below. See also @ref{Interrupted Primitives}.
  2006. If an error occurs, @code{select} returns @code{-1} and does not modify
  2007. the argument file descriptor sets. The following @code{errno} error
  2008. conditions are defined for this function:
  2009. @table @code
  2010. @item EBADF
  2011. One of the file descriptor sets specified an invalid file descriptor.
  2012. @item EINTR
  2013. The operation was interrupted by a signal. @xref{Interrupted Primitives}.
  2014. @item EINVAL
  2015. The @var{timeout} argument is invalid; one of the components is negative
  2016. or too large.
  2017. @end table
  2018. @end deftypefun
  2019. Here is an example showing how you can use @code{select} to establish a
  2020. timeout period for reading from a file descriptor. The @code{input_timeout}
  2021. function blocks the calling process until input is available on the
  2022. file descriptor, or until the timeout period expires.
  2023. @smallexample
  2024. @include select.c.texi
  2025. @end smallexample
  2026. There is another example showing the use of @code{select} to multiplex
  2027. input from multiple sockets in @ref{Server Example}.
  2028. For an alternate interface to this functionality, see @code{poll}
  2029. (@pxref{Other Low-Level I/O APIs}).
  2030. @node Synchronizing I/O
  2031. @section Synchronizing I/O operations
  2032. @cindex synchronizing
  2033. In most modern operating systems, the normal I/O operations are not
  2034. executed synchronously. I.e., even if a @code{write} system call
  2035. returns, this does not mean the data is actually written to the media,
  2036. e.g., the disk.
  2037. In situations where synchronization points are necessary, you can use
  2038. special functions which ensure that all operations finish before
  2039. they return.
  2040. @deftypefun void sync (void)
  2041. @standards{X/Open, unistd.h}
  2042. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  2043. A call to this function will not return as long as there is data which
  2044. has not been written to the device. All dirty buffers in the kernel will
  2045. be written and so an overall consistent system can be achieved (if no
  2046. other process in parallel writes data).
  2047. A prototype for @code{sync} can be found in @file{unistd.h}.
  2048. @end deftypefun
  2049. Programs more often want to ensure that data written to a given file is
  2050. committed, rather than all data in the system. For this, @code{sync} is overkill.
  2051. @deftypefun int fsync (int @var{fildes})
  2052. @standards{POSIX, unistd.h}
  2053. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  2054. The @code{fsync} function can be used to make sure all data associated with
  2055. the open file @var{fildes} is written to the device associated with the
  2056. descriptor. The function call does not return unless all actions have
  2057. finished.
  2058. A prototype for @code{fsync} can be found in @file{unistd.h}.
  2059. This function is a cancellation point in multi-threaded programs. This
  2060. is a problem if the thread allocates some resources (like memory, file
  2061. descriptors, semaphores or whatever) at the time @code{fsync} is
  2062. called. If the thread gets canceled these resources stay allocated
  2063. until the program ends. To avoid this, calls to @code{fsync} should be
  2064. protected using cancellation handlers.
  2065. @c ref pthread_cleanup_push / pthread_cleanup_pop
  2066. The return value of the function is zero if no error occurred. Otherwise
  2067. it is @math{-1} and the global variable @code{errno} is set to the
  2068. following values:
  2069. @table @code
  2070. @item EBADF
  2071. The descriptor @var{fildes} is not valid.
  2072. @item EINVAL
  2073. No synchronization is possible since the system does not implement this.
  2074. @end table
  2075. @end deftypefun
  2076. Sometimes it is not even necessary to write all data associated with a
  2077. file descriptor. E.g., in database files which do not change in size it
  2078. is enough to write all the file content data to the device.
  2079. Meta-information, like the modification time etc., are not that important
  2080. and leaving such information uncommitted does not prevent a successful
  2081. recovery of the file in case of a problem.
  2082. @deftypefun int fdatasync (int @var{fildes})
  2083. @standards{POSIX, unistd.h}
  2084. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  2085. When a call to the @code{fdatasync} function returns, it is ensured
  2086. that all of the file data is written to the device. For all pending I/O
  2087. operations, the parts guaranteeing data integrity finished.
  2088. Not all systems implement the @code{fdatasync} operation. On systems
  2089. missing this functionality @code{fdatasync} is emulated by a call to
  2090. @code{fsync} since the performed actions are a superset of those
  2091. required by @code{fdatasync}.
  2092. The prototype for @code{fdatasync} is in @file{unistd.h}.
  2093. The return value of the function is zero if no error occurred. Otherwise
  2094. it is @math{-1} and the global variable @code{errno} is set to the
  2095. following values:
  2096. @table @code
  2097. @item EBADF
  2098. The descriptor @var{fildes} is not valid.
  2099. @item EINVAL
  2100. No synchronization is possible since the system does not implement this.
  2101. @end table
  2102. @end deftypefun
  2103. @node Asynchronous I/O
  2104. @section Perform I/O Operations in Parallel
  2105. The POSIX.1b standard defines a new set of I/O operations which can
  2106. significantly reduce the time an application spends waiting for I/O. The
  2107. new functions allow a program to initiate one or more I/O operations and
  2108. then immediately resume normal work while the I/O operations are
  2109. executed in parallel. This functionality is available if the
  2110. @file{unistd.h} file defines the symbol @code{_POSIX_ASYNCHRONOUS_IO}.
  2111. These functions are part of the library with realtime functions named
  2112. @file{librt}. They are not actually part of the @file{libc} binary.
  2113. The implementation of these functions can be done using support in the
  2114. kernel (if available) or using an implementation based on threads at
  2115. userlevel. In the latter case it might be necessary to link applications
  2116. with the thread library @file{libpthread} in addition to @file{librt}.
  2117. All AIO operations operate on files which were opened previously. There
  2118. might be arbitrarily many operations running for one file. The
  2119. asynchronous I/O operations are controlled using a data structure named
  2120. @code{struct aiocb} (@dfn{AIO control block}). It is defined in
  2121. @file{aio.h} as follows.
  2122. @deftp {Data Type} {struct aiocb}
  2123. @standards{POSIX.1b, aio.h}
  2124. The POSIX.1b standard mandates that the @code{struct aiocb} structure
  2125. contains at least the members described in the following table. There
  2126. might be more elements which are used by the implementation, but
  2127. depending upon these elements is not portable and is highly deprecated.
  2128. @table @code
  2129. @item int aio_fildes
  2130. This element specifies the file descriptor to be used for the
  2131. operation. It must be a legal descriptor, otherwise the operation will
  2132. fail.
  2133. The device on which the file is opened must allow the seek operation.
  2134. I.e., it is not possible to use any of the AIO operations on devices
  2135. like terminals where an @code{lseek} call would lead to an error.
  2136. @item off_t aio_offset
  2137. This element specifies the offset in the file at which the operation (input
  2138. or output) is performed. Since the operations are carried out in arbitrary
  2139. order and more than one operation for one file descriptor can be
  2140. started, one cannot expect a current read/write position of the file
  2141. descriptor.
  2142. @item volatile void *aio_buf
  2143. This is a pointer to the buffer with the data to be written or the place
  2144. where the read data is stored.
  2145. @item size_t aio_nbytes
  2146. This element specifies the length of the buffer pointed to by @code{aio_buf}.
  2147. @item int aio_reqprio
  2148. If the platform has defined @code{_POSIX_PRIORITIZED_IO} and
  2149. @code{_POSIX_PRIORITY_SCHEDULING}, the AIO requests are
  2150. processed based on the current scheduling priority. The
  2151. @code{aio_reqprio} element can then be used to lower the priority of the
  2152. AIO operation.
  2153. @item struct sigevent aio_sigevent
  2154. This element specifies how the calling process is notified once the
  2155. operation terminates. If the @code{sigev_notify} element is
  2156. @code{SIGEV_NONE}, no notification is sent. If it is @code{SIGEV_SIGNAL},
  2157. the signal determined by @code{sigev_signo} is sent. Otherwise,
  2158. @code{sigev_notify} must be @code{SIGEV_THREAD}. In this case, a thread
  2159. is created which starts executing the function pointed to by
  2160. @code{sigev_notify_function}.
  2161. @item int aio_lio_opcode
  2162. This element is only used by the @code{lio_listio} and
  2163. @code{lio_listio64} functions. Since these functions allow an
  2164. arbitrary number of operations to start at once, and each operation can be
  2165. input or output (or nothing), the information must be stored in the
  2166. control block. The possible values are:
  2167. @vtable @code
  2168. @item LIO_READ
  2169. Start a read operation. Read from the file at position
  2170. @code{aio_offset} and store the next @code{aio_nbytes} bytes in the
  2171. buffer pointed to by @code{aio_buf}.
  2172. @item LIO_WRITE
  2173. Start a write operation. Write @code{aio_nbytes} bytes starting at
  2174. @code{aio_buf} into the file starting at position @code{aio_offset}.
  2175. @item LIO_NOP
  2176. Do nothing for this control block. This value is useful sometimes when
  2177. an array of @code{struct aiocb} values contains holes, i.e., some of the
  2178. values must not be handled although the whole array is presented to the
  2179. @code{lio_listio} function.
  2180. @end vtable
  2181. @end table
  2182. When the sources are compiled using @code{_FILE_OFFSET_BITS == 64} on a
  2183. 32 bit machine, this type is in fact @code{struct aiocb64}, since the LFS
  2184. interface transparently replaces the @code{struct aiocb} definition.
  2185. @end deftp
  2186. For use with the AIO functions defined in the LFS, there is a similar type
  2187. defined which replaces the types of the appropriate members with larger
  2188. types but otherwise is equivalent to @code{struct aiocb}. Particularly,
  2189. all member names are the same.
  2190. @deftp {Data Type} {struct aiocb64}
  2191. @standards{POSIX.1b, aio.h}
  2192. @table @code
  2193. @item int aio_fildes
  2194. This element specifies the file descriptor which is used for the
  2195. operation. It must be a legal descriptor since otherwise the operation
  2196. fails for obvious reasons.
  2197. The device on which the file is opened must allow the seek operation.
  2198. I.e., it is not possible to use any of the AIO operations on devices
  2199. like terminals where an @code{lseek} call would lead to an error.
  2200. @item off64_t aio_offset
  2201. This element specifies at which offset in the file the operation (input
  2202. or output) is performed. Since the operation are carried in arbitrary
  2203. order and more than one operation for one file descriptor can be
  2204. started, one cannot expect a current read/write position of the file
  2205. descriptor.
  2206. @item volatile void *aio_buf
  2207. This is a pointer to the buffer with the data to be written or the place
  2208. where the read data is stored.
  2209. @item size_t aio_nbytes
  2210. This element specifies the length of the buffer pointed to by @code{aio_buf}.
  2211. @item int aio_reqprio
  2212. If for the platform @code{_POSIX_PRIORITIZED_IO} and
  2213. @code{_POSIX_PRIORITY_SCHEDULING} are defined the AIO requests are
  2214. processed based on the current scheduling priority. The
  2215. @code{aio_reqprio} element can then be used to lower the priority of the
  2216. AIO operation.
  2217. @item struct sigevent aio_sigevent
  2218. This element specifies how the calling process is notified once the
  2219. operation terminates. If the @code{sigev_notify} element is
  2220. @code{SIGEV_NONE} no notification is sent. If it is @code{SIGEV_SIGNAL},
  2221. the signal determined by @code{sigev_signo} is sent. Otherwise,
  2222. @code{sigev_notify} must be @code{SIGEV_THREAD} in which case a thread
  2223. is created which starts executing the function pointed to by
  2224. @code{sigev_notify_function}.
  2225. @item int aio_lio_opcode
  2226. This element is only used by the @code{lio_listio} and
  2227. @code{lio_listio64} functions. Since these functions allow an
  2228. arbitrary number of operations to start at once, and since each operation can be
  2229. input or output (or nothing), the information must be stored in the
  2230. control block. See the description of @code{struct aiocb} for a description
  2231. of the possible values.
  2232. @end table
  2233. When the sources are compiled using @code{_FILE_OFFSET_BITS == 64} on a
  2234. 32 bit machine, this type is available under the name @code{struct
  2235. aiocb64}, since the LFS transparently replaces the old interface.
  2236. @end deftp
  2237. @menu
  2238. * Asynchronous Reads/Writes:: Asynchronous Read and Write Operations.
  2239. * Status of AIO Operations:: Getting the Status of AIO Operations.
  2240. * Synchronizing AIO Operations:: Getting into a consistent state.
  2241. * Cancel AIO Operations:: Cancellation of AIO Operations.
  2242. * Configuration of AIO:: How to optimize the AIO implementation.
  2243. @end menu
  2244. @node Asynchronous Reads/Writes
  2245. @subsection Asynchronous Read and Write Operations
  2246. @deftypefun int aio_read (struct aiocb *@var{aiocbp})
  2247. @standards{POSIX.1b, aio.h}
  2248. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}}
  2249. @c Calls aio_enqueue_request.
  2250. @c aio_enqueue_request @asulock @ascuheap @aculock @acsmem
  2251. @c pthread_self ok
  2252. @c pthread_getschedparam @asulock @aculock
  2253. @c lll_lock (pthread descriptor's lock) @asulock @aculock
  2254. @c sched_getparam ok
  2255. @c sched_getscheduler ok
  2256. @c lll_unlock @aculock
  2257. @c pthread_mutex_lock (aio_requests_mutex) @asulock @aculock
  2258. @c get_elem @ascuheap @acsmem [@asucorrupt @acucorrupt]
  2259. @c realloc @ascuheap @acsmem
  2260. @c calloc @ascuheap @acsmem
  2261. @c aio_create_helper_thread @asulock @ascuheap @aculock @acsmem
  2262. @c pthread_attr_init ok
  2263. @c pthread_attr_setdetachstate ok
  2264. @c pthread_get_minstack ok
  2265. @c pthread_attr_setstacksize ok
  2266. @c sigfillset ok
  2267. @c memset ok
  2268. @c sigdelset ok
  2269. @c SYSCALL rt_sigprocmask ok
  2270. @c pthread_create @asulock @ascuheap @aculock @acsmem
  2271. @c lll_lock (default_pthread_attr_lock) @asulock @aculock
  2272. @c alloca/malloc @ascuheap @acsmem
  2273. @c lll_unlock @aculock
  2274. @c allocate_stack @asulock @ascuheap @aculock @acsmem
  2275. @c getpagesize dup
  2276. @c lll_lock (default_pthread_attr_lock) @asulock @aculock
  2277. @c lll_unlock @aculock
  2278. @c _dl_allocate_tls @ascuheap @acsmem
  2279. @c _dl_allocate_tls_storage @ascuheap @acsmem
  2280. @c memalign @ascuheap @acsmem
  2281. @c memset ok
  2282. @c allocate_dtv dup
  2283. @c free @ascuheap @acsmem
  2284. @c allocate_dtv @ascuheap @acsmem
  2285. @c calloc @ascuheap @acsmem
  2286. @c INSTALL_DTV ok
  2287. @c list_add dup
  2288. @c get_cached_stack
  2289. @c lll_lock (stack_cache_lock) @asulock @aculock
  2290. @c list_for_each ok
  2291. @c list_entry dup
  2292. @c FREE_P dup
  2293. @c stack_list_del dup
  2294. @c stack_list_add dup
  2295. @c lll_unlock @aculock
  2296. @c _dl_allocate_tls_init ok
  2297. @c GET_DTV ok
  2298. @c mmap ok
  2299. @c atomic_fetch_add_relaxed ok
  2300. @c munmap ok
  2301. @c change_stack_perm ok
  2302. @c mprotect ok
  2303. @c mprotect ok
  2304. @c stack_list_del dup
  2305. @c _dl_deallocate_tls dup
  2306. @c munmap ok
  2307. @c THREAD_COPY_STACK_GUARD ok
  2308. @c THREAD_COPY_POINTER_GUARD ok
  2309. @c atomic_exchange_acquire ok
  2310. @c lll_futex_wake ok
  2311. @c deallocate_stack @asulock @ascuheap @aculock @acsmem
  2312. @c lll_lock (state_cache_lock) @asulock @aculock
  2313. @c stack_list_del ok
  2314. @c atomic_write_barrier ok
  2315. @c list_del ok
  2316. @c atomic_write_barrier ok
  2317. @c queue_stack @ascuheap @acsmem
  2318. @c stack_list_add ok
  2319. @c atomic_write_barrier ok
  2320. @c list_add ok
  2321. @c atomic_write_barrier ok
  2322. @c free_stacks @ascuheap @acsmem
  2323. @c list_for_each_prev_safe ok
  2324. @c list_entry ok
  2325. @c FREE_P ok
  2326. @c stack_list_del dup
  2327. @c _dl_deallocate_tls dup
  2328. @c munmap ok
  2329. @c _dl_deallocate_tls @ascuheap @acsmem
  2330. @c free @ascuheap @acsmem
  2331. @c lll_unlock @aculock
  2332. @c create_thread @asulock @ascuheap @aculock @acsmem
  2333. @c td_eventword
  2334. @c td_eventmask
  2335. @c do_clone @asulock @ascuheap @aculock @acsmem
  2336. @c PREPARE_CREATE ok
  2337. @c lll_lock (pd->lock) @asulock @aculock
  2338. @c atomic_fetch_add_relaxed ok
  2339. @c clone ok
  2340. @c atomic_fetch_add_relaxed ok
  2341. @c atomic_exchange_acquire ok
  2342. @c lll_futex_wake ok
  2343. @c deallocate_stack dup
  2344. @c sched_setaffinity ok
  2345. @c tgkill ok
  2346. @c sched_setscheduler ok
  2347. @c atomic_compare_and_exchange_bool_acq ok
  2348. @c nptl_create_event ok
  2349. @c lll_unlock (pd->lock) @aculock
  2350. @c free @ascuheap @acsmem
  2351. @c pthread_attr_destroy ok (cpuset won't be set, so free isn't called)
  2352. @c add_request_to_runlist ok
  2353. @c pthread_cond_signal ok
  2354. @c aio_free_request ok
  2355. @c pthread_mutex_unlock @aculock
  2356. @c (in the new thread, initiated with clone)
  2357. @c start_thread ok
  2358. @c HP_TIMING_NOW ok
  2359. @c ctype_init @mtslocale
  2360. @c atomic_exchange_acquire ok
  2361. @c lll_futex_wake ok
  2362. @c sigemptyset ok
  2363. @c sigaddset ok
  2364. @c setjmp ok
  2365. @c LIBC_CANCEL_ASYNC -> __pthread_enable_asynccancel ok
  2366. @c do_cancel ok
  2367. @c pthread_unwind ok
  2368. @c Unwind_ForcedUnwind or longjmp ok [@ascuheap @acsmem?]
  2369. @c lll_lock @asulock @aculock
  2370. @c lll_unlock @asulock @aculock
  2371. @c LIBC_CANCEL_RESET -> __pthread_disable_asynccancel ok
  2372. @c lll_futex_wait ok
  2373. @c ->start_routine ok -----
  2374. @c call_tls_dtors @asulock @ascuheap @aculock @acsmem
  2375. @c user-supplied dtor
  2376. @c rtld_lock_lock_recursive (dl_load_lock) @asulock @aculock
  2377. @c rtld_lock_unlock_recursive @aculock
  2378. @c free @ascuheap @acsmem
  2379. @c nptl_deallocate_tsd @ascuheap @acsmem
  2380. @c tsd user-supplied dtors ok
  2381. @c free @ascuheap @acsmem
  2382. @c libc_thread_freeres
  2383. @c libc_thread_subfreeres ok
  2384. @c atomic_fetch_add_relaxed ok
  2385. @c td_eventword ok
  2386. @c td_eventmask ok
  2387. @c atomic_compare_exchange_bool_acq ok
  2388. @c nptl_death_event ok
  2389. @c lll_robust_dead ok
  2390. @c getpagesize ok
  2391. @c madvise ok
  2392. @c free_tcb @asulock @ascuheap @aculock @acsmem
  2393. @c free @ascuheap @acsmem
  2394. @c deallocate_stack @asulock @ascuheap @aculock @acsmem
  2395. @c lll_futex_wait ok
  2396. @c exit_thread_inline ok
  2397. @c syscall(exit) ok
  2398. This function initiates an asynchronous read operation. It
  2399. immediately returns after the operation was enqueued or when an
  2400. error was encountered.
  2401. The first @code{aiocbp->aio_nbytes} bytes of the file for which
  2402. @code{aiocbp->aio_fildes} is a descriptor are written to the buffer
  2403. starting at @code{aiocbp->aio_buf}. Reading starts at the absolute
  2404. position @code{aiocbp->aio_offset} in the file.
  2405. If prioritized I/O is supported by the platform the
  2406. @code{aiocbp->aio_reqprio} value is used to adjust the priority before
  2407. the request is actually enqueued.
  2408. The calling process is notified about the termination of the read
  2409. request according to the @code{aiocbp->aio_sigevent} value.
  2410. When @code{aio_read} returns, the return value is zero if no error
  2411. occurred that can be found before the process is enqueued. If such an
  2412. early error is found, the function returns @math{-1} and sets
  2413. @code{errno} to one of the following values:
  2414. @table @code
  2415. @item EAGAIN
  2416. The request was not enqueued due to (temporarily) exceeded resource
  2417. limitations.
  2418. @item ENOSYS
  2419. The @code{aio_read} function is not implemented.
  2420. @item EBADF
  2421. The @code{aiocbp->aio_fildes} descriptor is not valid. This condition
  2422. need not be recognized before enqueueing the request and so this error
  2423. might also be signaled asynchronously.
  2424. @item EINVAL
  2425. The @code{aiocbp->aio_offset} or @code{aiocbp->aio_reqpiro} value is
  2426. invalid. This condition need not be recognized before enqueueing the
  2427. request and so this error might also be signaled asynchronously.
  2428. @end table
  2429. If @code{aio_read} returns zero, the current status of the request
  2430. can be queried using @code{aio_error} and @code{aio_return} functions.
  2431. As long as the value returned by @code{aio_error} is @code{EINPROGRESS}
  2432. the operation has not yet completed. If @code{aio_error} returns zero,
  2433. the operation successfully terminated, otherwise the value is to be
  2434. interpreted as an error code. If the function terminated, the result of
  2435. the operation can be obtained using a call to @code{aio_return}. The
  2436. returned value is the same as an equivalent call to @code{read} would
  2437. have returned. Possible error codes returned by @code{aio_error} are:
  2438. @table @code
  2439. @item EBADF
  2440. The @code{aiocbp->aio_fildes} descriptor is not valid.
  2441. @item ECANCELED
  2442. The operation was canceled before the operation was finished
  2443. (@pxref{Cancel AIO Operations})
  2444. @item EINVAL
  2445. The @code{aiocbp->aio_offset} value is invalid.
  2446. @end table
  2447. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this
  2448. function is in fact @code{aio_read64} since the LFS interface transparently
  2449. replaces the normal implementation.
  2450. @end deftypefun
  2451. @deftypefun int aio_read64 (struct aiocb64 *@var{aiocbp})
  2452. @standards{Unix98, aio.h}
  2453. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}}
  2454. This function is similar to the @code{aio_read} function. The only
  2455. difference is that on @w{32 bit} machines, the file descriptor should
  2456. be opened in the large file mode. Internally, @code{aio_read64} uses
  2457. functionality equivalent to @code{lseek64} (@pxref{File Position
  2458. Primitive}) to position the file descriptor correctly for the reading,
  2459. as opposed to the @code{lseek} functionality used in @code{aio_read}.
  2460. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this
  2461. function is available under the name @code{aio_read} and so transparently
  2462. replaces the interface for small files on 32 bit machines.
  2463. @end deftypefun
  2464. To write data asynchronously to a file, there exists an equivalent pair
  2465. of functions with a very similar interface.
  2466. @deftypefun int aio_write (struct aiocb *@var{aiocbp})
  2467. @standards{POSIX.1b, aio.h}
  2468. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}}
  2469. This function initiates an asynchronous write operation. The function
  2470. call immediately returns after the operation was enqueued or if before
  2471. this happens an error was encountered.
  2472. The first @code{aiocbp->aio_nbytes} bytes from the buffer starting at
  2473. @code{aiocbp->aio_buf} are written to the file for which
  2474. @code{aiocbp->aio_fildes} is a descriptor, starting at the absolute
  2475. position @code{aiocbp->aio_offset} in the file.
  2476. If prioritized I/O is supported by the platform, the
  2477. @code{aiocbp->aio_reqprio} value is used to adjust the priority before
  2478. the request is actually enqueued.
  2479. The calling process is notified about the termination of the read
  2480. request according to the @code{aiocbp->aio_sigevent} value.
  2481. When @code{aio_write} returns, the return value is zero if no error
  2482. occurred that can be found before the process is enqueued. If such an
  2483. early error is found the function returns @math{-1} and sets
  2484. @code{errno} to one of the following values.
  2485. @table @code
  2486. @item EAGAIN
  2487. The request was not enqueued due to (temporarily) exceeded resource
  2488. limitations.
  2489. @item ENOSYS
  2490. The @code{aio_write} function is not implemented.
  2491. @item EBADF
  2492. The @code{aiocbp->aio_fildes} descriptor is not valid. This condition
  2493. may not be recognized before enqueueing the request, and so this error
  2494. might also be signaled asynchronously.
  2495. @item EINVAL
  2496. The @code{aiocbp->aio_offset} or @code{aiocbp->aio_reqprio} value is
  2497. invalid. This condition may not be recognized before enqueueing the
  2498. request and so this error might also be signaled asynchronously.
  2499. @end table
  2500. In the case @code{aio_write} returns zero, the current status of the
  2501. request can be queried using the @code{aio_error} and @code{aio_return}
  2502. functions. As long as the value returned by @code{aio_error} is
  2503. @code{EINPROGRESS} the operation has not yet completed. If
  2504. @code{aio_error} returns zero, the operation successfully terminated,
  2505. otherwise the value is to be interpreted as an error code. If the
  2506. function terminated, the result of the operation can be obtained using a call
  2507. to @code{aio_return}. The returned value is the same as an equivalent
  2508. call to @code{read} would have returned. Possible error codes returned
  2509. by @code{aio_error} are:
  2510. @table @code
  2511. @item EBADF
  2512. The @code{aiocbp->aio_fildes} descriptor is not valid.
  2513. @item ECANCELED
  2514. The operation was canceled before the operation was finished.
  2515. (@pxref{Cancel AIO Operations})
  2516. @item EINVAL
  2517. The @code{aiocbp->aio_offset} value is invalid.
  2518. @end table
  2519. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this
  2520. function is in fact @code{aio_write64} since the LFS interface transparently
  2521. replaces the normal implementation.
  2522. @end deftypefun
  2523. @deftypefun int aio_write64 (struct aiocb64 *@var{aiocbp})
  2524. @standards{Unix98, aio.h}
  2525. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}}
  2526. This function is similar to the @code{aio_write} function. The only
  2527. difference is that on @w{32 bit} machines the file descriptor should
  2528. be opened in the large file mode. Internally @code{aio_write64} uses
  2529. functionality equivalent to @code{lseek64} (@pxref{File Position
  2530. Primitive}) to position the file descriptor correctly for the writing,
  2531. as opposed to the @code{lseek} functionality used in @code{aio_write}.
  2532. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this
  2533. function is available under the name @code{aio_write} and so transparently
  2534. replaces the interface for small files on 32 bit machines.
  2535. @end deftypefun
  2536. Besides these functions with the more or less traditional interface,
  2537. POSIX.1b also defines a function which can initiate more than one
  2538. operation at a time, and which can handle freely mixed read and write
  2539. operations. It is therefore similar to a combination of @code{readv} and
  2540. @code{writev}.
  2541. @deftypefun int lio_listio (int @var{mode}, struct aiocb *const @var{list}[], int @var{nent}, struct sigevent *@var{sig})
  2542. @standards{POSIX.1b, aio.h}
  2543. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}}
  2544. @c Call lio_listio_internal, that takes the aio_requests_mutex lock and
  2545. @c enqueues each request. Then, it waits for notification or prepares
  2546. @c for it before releasing the lock. Even though it performs memory
  2547. @c allocation and locking of its own, it doesn't add any classes of
  2548. @c safety issues that aren't already covered by aio_enqueue_request.
  2549. The @code{lio_listio} function can be used to enqueue an arbitrary
  2550. number of read and write requests at one time. The requests can all be
  2551. meant for the same file, all for different files or every solution in
  2552. between.
  2553. @code{lio_listio} gets the @var{nent} requests from the array pointed to
  2554. by @var{list}. The operation to be performed is determined by the
  2555. @code{aio_lio_opcode} member in each element of @var{list}. If this
  2556. field is @code{LIO_READ} a read operation is enqueued, similar to a call
  2557. of @code{aio_read} for this element of the array (except that the way
  2558. the termination is signalled is different, as we will see below). If
  2559. the @code{aio_lio_opcode} member is @code{LIO_WRITE} a write operation
  2560. is enqueued. Otherwise the @code{aio_lio_opcode} must be @code{LIO_NOP}
  2561. in which case this element of @var{list} is simply ignored. This
  2562. ``operation'' is useful in situations where one has a fixed array of
  2563. @code{struct aiocb} elements from which only a few need to be handled at
  2564. a time. Another situation is where the @code{lio_listio} call was
  2565. canceled before all requests are processed (@pxref{Cancel AIO
  2566. Operations}) and the remaining requests have to be reissued.
  2567. The other members of each element of the array pointed to by
  2568. @code{list} must have values suitable for the operation as described in
  2569. the documentation for @code{aio_read} and @code{aio_write} above.
  2570. The @var{mode} argument determines how @code{lio_listio} behaves after
  2571. having enqueued all the requests. If @var{mode} is @code{LIO_WAIT} it
  2572. waits until all requests terminated. Otherwise @var{mode} must be
  2573. @code{LIO_NOWAIT} and in this case the function returns immediately after
  2574. having enqueued all the requests. In this case the caller gets a
  2575. notification of the termination of all requests according to the
  2576. @var{sig} parameter. If @var{sig} is @code{NULL} no notification is
  2577. sent. Otherwise a signal is sent or a thread is started, just as
  2578. described in the description for @code{aio_read} or @code{aio_write}.
  2579. If @var{mode} is @code{LIO_WAIT}, the return value of @code{lio_listio}
  2580. is @math{0} when all requests completed successfully. Otherwise the
  2581. function returns @math{-1} and @code{errno} is set accordingly. To find
  2582. out which request or requests failed one has to use the @code{aio_error}
  2583. function on all the elements of the array @var{list}.
  2584. In case @var{mode} is @code{LIO_NOWAIT}, the function returns @math{0} if
  2585. all requests were enqueued correctly. The current state of the requests
  2586. can be found using @code{aio_error} and @code{aio_return} as described
  2587. above. If @code{lio_listio} returns @math{-1} in this mode, the
  2588. global variable @code{errno} is set accordingly. If a request did not
  2589. yet terminate, a call to @code{aio_error} returns @code{EINPROGRESS}. If
  2590. the value is different, the request is finished and the error value (or
  2591. @math{0}) is returned and the result of the operation can be retrieved
  2592. using @code{aio_return}.
  2593. Possible values for @code{errno} are:
  2594. @table @code
  2595. @item EAGAIN
  2596. The resources necessary to queue all the requests are not available at
  2597. the moment. The error status for each element of @var{list} must be
  2598. checked to determine which request failed.
  2599. Another reason could be that the system wide limit of AIO requests is
  2600. exceeded. This cannot be the case for the implementation on @gnusystems{}
  2601. since no arbitrary limits exist.
  2602. @item EINVAL
  2603. The @var{mode} parameter is invalid or @var{nent} is larger than
  2604. @code{AIO_LISTIO_MAX}.
  2605. @item EIO
  2606. One or more of the request's I/O operations failed. The error status of
  2607. each request should be checked to determine which one failed.
  2608. @item ENOSYS
  2609. The @code{lio_listio} function is not supported.
  2610. @end table
  2611. If the @var{mode} parameter is @code{LIO_NOWAIT} and the caller cancels
  2612. a request, the error status for this request returned by
  2613. @code{aio_error} is @code{ECANCELED}.
  2614. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this
  2615. function is in fact @code{lio_listio64} since the LFS interface
  2616. transparently replaces the normal implementation.
  2617. @end deftypefun
  2618. @deftypefun int lio_listio64 (int @var{mode}, struct aiocb64 *const @var{list}[], int @var{nent}, struct sigevent *@var{sig})
  2619. @standards{Unix98, aio.h}
  2620. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}}
  2621. This function is similar to the @code{lio_listio} function. The only
  2622. difference is that on @w{32 bit} machines, the file descriptor should
  2623. be opened in the large file mode. Internally, @code{lio_listio64} uses
  2624. functionality equivalent to @code{lseek64} (@pxref{File Position
  2625. Primitive}) to position the file descriptor correctly for the reading or
  2626. writing, as opposed to the @code{lseek} functionality used in
  2627. @code{lio_listio}.
  2628. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this
  2629. function is available under the name @code{lio_listio} and so
  2630. transparently replaces the interface for small files on 32 bit
  2631. machines.
  2632. @end deftypefun
  2633. @node Status of AIO Operations
  2634. @subsection Getting the Status of AIO Operations
  2635. As already described in the documentation of the functions in the last
  2636. section, it must be possible to get information about the status of an I/O
  2637. request. When the operation is performed truly asynchronously (as with
  2638. @code{aio_read} and @code{aio_write} and with @code{lio_listio} when the
  2639. mode is @code{LIO_NOWAIT}), one sometimes needs to know whether a
  2640. specific request already terminated and if so, what the result was.
  2641. The following two functions allow you to get this kind of information.
  2642. @deftypefun int aio_error (const struct aiocb *@var{aiocbp})
  2643. @standards{POSIX.1b, aio.h}
  2644. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  2645. This function determines the error state of the request described by the
  2646. @code{struct aiocb} variable pointed to by @var{aiocbp}. If the
  2647. request has not yet terminated the value returned is always
  2648. @code{EINPROGRESS}. Once the request has terminated the value
  2649. @code{aio_error} returns is either @math{0} if the request completed
  2650. successfully or it returns the value which would be stored in the
  2651. @code{errno} variable if the request would have been done using
  2652. @code{read}, @code{write}, or @code{fsync}.
  2653. The function can return @code{ENOSYS} if it is not implemented. It
  2654. could also return @code{EINVAL} if the @var{aiocbp} parameter does not
  2655. refer to an asynchronous operation whose return status is not yet known.
  2656. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this
  2657. function is in fact @code{aio_error64} since the LFS interface
  2658. transparently replaces the normal implementation.
  2659. @end deftypefun
  2660. @deftypefun int aio_error64 (const struct aiocb64 *@var{aiocbp})
  2661. @standards{Unix98, aio.h}
  2662. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  2663. This function is similar to @code{aio_error} with the only difference
  2664. that the argument is a reference to a variable of type @code{struct
  2665. aiocb64}.
  2666. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this
  2667. function is available under the name @code{aio_error} and so
  2668. transparently replaces the interface for small files on 32 bit
  2669. machines.
  2670. @end deftypefun
  2671. @deftypefun ssize_t aio_return (struct aiocb *@var{aiocbp})
  2672. @standards{POSIX.1b, aio.h}
  2673. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  2674. This function can be used to retrieve the return status of the operation
  2675. carried out by the request described in the variable pointed to by
  2676. @var{aiocbp}. As long as the error status of this request as returned
  2677. by @code{aio_error} is @code{EINPROGRESS} the return value of this function is
  2678. undefined.
  2679. Once the request is finished this function can be used exactly once to
  2680. retrieve the return value. Following calls might lead to undefined
  2681. behavior. The return value itself is the value which would have been
  2682. returned by the @code{read}, @code{write}, or @code{fsync} call.
  2683. The function can return @code{ENOSYS} if it is not implemented. It
  2684. could also return @code{EINVAL} if the @var{aiocbp} parameter does not
  2685. refer to an asynchronous operation whose return status is not yet known.
  2686. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this
  2687. function is in fact @code{aio_return64} since the LFS interface
  2688. transparently replaces the normal implementation.
  2689. @end deftypefun
  2690. @deftypefun ssize_t aio_return64 (struct aiocb64 *@var{aiocbp})
  2691. @standards{Unix98, aio.h}
  2692. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  2693. This function is similar to @code{aio_return} with the only difference
  2694. that the argument is a reference to a variable of type @code{struct
  2695. aiocb64}.
  2696. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this
  2697. function is available under the name @code{aio_return} and so
  2698. transparently replaces the interface for small files on 32 bit
  2699. machines.
  2700. @end deftypefun
  2701. @node Synchronizing AIO Operations
  2702. @subsection Getting into a Consistent State
  2703. When dealing with asynchronous operations it is sometimes necessary to
  2704. get into a consistent state. This would mean for AIO that one wants to
  2705. know whether a certain request or a group of requests were processed.
  2706. This could be done by waiting for the notification sent by the system
  2707. after the operation terminated, but this sometimes would mean wasting
  2708. resources (mainly computation time). Instead POSIX.1b defines two
  2709. functions which will help with most kinds of consistency.
  2710. The @code{aio_fsync} and @code{aio_fsync64} functions are only available
  2711. if the symbol @code{_POSIX_SYNCHRONIZED_IO} is defined in @file{unistd.h}.
  2712. @cindex synchronizing
  2713. @deftypefun int aio_fsync (int @var{op}, struct aiocb *@var{aiocbp})
  2714. @standards{POSIX.1b, aio.h}
  2715. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}}
  2716. @c After fcntl to check that the FD is open, it calls
  2717. @c aio_enqueue_request.
  2718. Calling this function forces all I/O operations queued at the
  2719. time of the function call operating on the file descriptor
  2720. @code{aiocbp->aio_fildes} into the synchronized I/O completion state
  2721. (@pxref{Synchronizing I/O}). The @code{aio_fsync} function returns
  2722. immediately but the notification through the method described in
  2723. @code{aiocbp->aio_sigevent} will happen only after all requests for this
  2724. file descriptor have terminated and the file is synchronized. This also
  2725. means that requests for this very same file descriptor which are queued
  2726. after the synchronization request are not affected.
  2727. If @var{op} is @code{O_DSYNC} the synchronization happens as with a call
  2728. to @code{fdatasync}. Otherwise @var{op} should be @code{O_SYNC} and
  2729. the synchronization happens as with @code{fsync}.
  2730. As long as the synchronization has not happened, a call to
  2731. @code{aio_error} with the reference to the object pointed to by
  2732. @var{aiocbp} returns @code{EINPROGRESS}. Once the synchronization is
  2733. done @code{aio_error} return @math{0} if the synchronization was not
  2734. successful. Otherwise the value returned is the value to which the
  2735. @code{fsync} or @code{fdatasync} function would have set the
  2736. @code{errno} variable. In this case nothing can be assumed about the
  2737. consistency of the data written to this file descriptor.
  2738. The return value of this function is @math{0} if the request was
  2739. successfully enqueued. Otherwise the return value is @math{-1} and
  2740. @code{errno} is set to one of the following values:
  2741. @table @code
  2742. @item EAGAIN
  2743. The request could not be enqueued due to temporary lack of resources.
  2744. @item EBADF
  2745. The file descriptor @code{@var{aiocbp}->aio_fildes} is not valid.
  2746. @item EINVAL
  2747. The implementation does not support I/O synchronization or the @var{op}
  2748. parameter is other than @code{O_DSYNC} and @code{O_SYNC}.
  2749. @item ENOSYS
  2750. This function is not implemented.
  2751. @end table
  2752. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this
  2753. function is in fact @code{aio_fsync64} since the LFS interface
  2754. transparently replaces the normal implementation.
  2755. @end deftypefun
  2756. @deftypefun int aio_fsync64 (int @var{op}, struct aiocb64 *@var{aiocbp})
  2757. @standards{Unix98, aio.h}
  2758. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}}
  2759. This function is similar to @code{aio_fsync} with the only difference
  2760. that the argument is a reference to a variable of type @code{struct
  2761. aiocb64}.
  2762. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this
  2763. function is available under the name @code{aio_fsync} and so
  2764. transparently replaces the interface for small files on 32 bit
  2765. machines.
  2766. @end deftypefun
  2767. Another method of synchronization is to wait until one or more requests of a
  2768. specific set terminated. This could be achieved by the @code{aio_*}
  2769. functions to notify the initiating process about the termination but in
  2770. some situations this is not the ideal solution. In a program which
  2771. constantly updates clients somehow connected to the server it is not
  2772. always the best solution to go round robin since some connections might
  2773. be slow. On the other hand letting the @code{aio_*} functions notify the
  2774. caller might also be not the best solution since whenever the process
  2775. works on preparing data for a client it makes no sense to be
  2776. interrupted by a notification since the new client will not be handled
  2777. before the current client is served. For situations like this
  2778. @code{aio_suspend} should be used.
  2779. @deftypefun int aio_suspend (const struct aiocb *const @var{list}[], int @var{nent}, const struct timespec *@var{timeout})
  2780. @standards{POSIX.1b, aio.h}
  2781. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}}
  2782. @c Take aio_requests_mutex, set up waitlist and requestlist, wait
  2783. @c for completion or timeout, and release the mutex.
  2784. When calling this function, the calling thread is suspended until at
  2785. least one of the requests pointed to by the @var{nent} elements of the
  2786. array @var{list} has completed. If any of the requests has already
  2787. completed at the time @code{aio_suspend} is called, the function returns
  2788. immediately. Whether a request has terminated or not is determined by
  2789. comparing the error status of the request with @code{EINPROGRESS}. If
  2790. an element of @var{list} is @code{NULL}, the entry is simply ignored.
  2791. If no request has finished, the calling process is suspended. If
  2792. @var{timeout} is @code{NULL}, the process is not woken until a request
  2793. has finished. If @var{timeout} is not @code{NULL}, the process remains
  2794. suspended at least as long as specified in @var{timeout}. In this case,
  2795. @code{aio_suspend} returns with an error.
  2796. The return value of the function is @math{0} if one or more requests
  2797. from the @var{list} have terminated. Otherwise the function returns
  2798. @math{-1} and @code{errno} is set to one of the following values:
  2799. @table @code
  2800. @item EAGAIN
  2801. None of the requests from the @var{list} completed in the time specified
  2802. by @var{timeout}.
  2803. @item EINTR
  2804. A signal interrupted the @code{aio_suspend} function. This signal might
  2805. also be sent by the AIO implementation while signalling the termination
  2806. of one of the requests.
  2807. @item ENOSYS
  2808. The @code{aio_suspend} function is not implemented.
  2809. @end table
  2810. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this
  2811. function is in fact @code{aio_suspend64} since the LFS interface
  2812. transparently replaces the normal implementation.
  2813. @end deftypefun
  2814. @deftypefun int aio_suspend64 (const struct aiocb64 *const @var{list}[], int @var{nent}, const struct timespec *@var{timeout})
  2815. @standards{Unix98, aio.h}
  2816. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}}
  2817. This function is similar to @code{aio_suspend} with the only difference
  2818. that the argument is a reference to a variable of type @code{struct
  2819. aiocb64}.
  2820. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this
  2821. function is available under the name @code{aio_suspend} and so
  2822. transparently replaces the interface for small files on 32 bit
  2823. machines.
  2824. @end deftypefun
  2825. @node Cancel AIO Operations
  2826. @subsection Cancellation of AIO Operations
  2827. When one or more requests are asynchronously processed, it might be
  2828. useful in some situations to cancel a selected operation, e.g., if it
  2829. becomes obvious that the written data is no longer accurate and would
  2830. have to be overwritten soon. As an example, assume an application, which
  2831. writes data in files in a situation where new incoming data would have
  2832. to be written in a file which will be updated by an enqueued request.
  2833. The POSIX AIO implementation provides such a function, but this function
  2834. is not capable of forcing the cancellation of the request. It is up to the
  2835. implementation to decide whether it is possible to cancel the operation
  2836. or not. Therefore using this function is merely a hint.
  2837. @deftypefun int aio_cancel (int @var{fildes}, struct aiocb *@var{aiocbp})
  2838. @standards{POSIX.1b, aio.h}
  2839. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}}
  2840. @c After fcntl to check the fd is open, hold aio_requests_mutex, call
  2841. @c aio_find_req_fd, aio_remove_request, then aio_notify and
  2842. @c aio_free_request each request before releasing the lock.
  2843. @c aio_notify calls aio_notify_only and free, besides cond signal or
  2844. @c similar. aio_notify_only calls pthread_attr_init,
  2845. @c pthread_attr_setdetachstate, malloc, pthread_create,
  2846. @c notify_func_wrapper, aio_sigqueue, getpid, raise.
  2847. @c notify_func_wraper calls aio_start_notify_thread, free and then the
  2848. @c notifier function.
  2849. The @code{aio_cancel} function can be used to cancel one or more
  2850. outstanding requests. If the @var{aiocbp} parameter is @code{NULL}, the
  2851. function tries to cancel all of the outstanding requests which would process
  2852. the file descriptor @var{fildes} (i.e., whose @code{aio_fildes} member
  2853. is @var{fildes}). If @var{aiocbp} is not @code{NULL}, @code{aio_cancel}
  2854. attempts to cancel the specific request pointed to by @var{aiocbp}.
  2855. For requests which were successfully canceled, the normal notification
  2856. about the termination of the request should take place. I.e., depending
  2857. on the @code{struct sigevent} object which controls this, nothing
  2858. happens, a signal is sent or a thread is started. If the request cannot
  2859. be canceled, it terminates the usual way after performing the operation.
  2860. After a request is successfully canceled, a call to @code{aio_error} with
  2861. a reference to this request as the parameter will return
  2862. @code{ECANCELED} and a call to @code{aio_return} will return @math{-1}.
  2863. If the request wasn't canceled and is still running the error status is
  2864. still @code{EINPROGRESS}.
  2865. The return value of the function is @code{AIO_CANCELED} if there were
  2866. requests which haven't terminated and which were successfully canceled.
  2867. If there is one or more requests left which couldn't be canceled, the
  2868. return value is @code{AIO_NOTCANCELED}. In this case @code{aio_error}
  2869. must be used to find out which of the, perhaps multiple, requests (if
  2870. @var{aiocbp} is @code{NULL}) weren't successfully canceled. If all
  2871. requests already terminated at the time @code{aio_cancel} is called the
  2872. return value is @code{AIO_ALLDONE}.
  2873. If an error occurred during the execution of @code{aio_cancel} the
  2874. function returns @math{-1} and sets @code{errno} to one of the following
  2875. values.
  2876. @table @code
  2877. @item EBADF
  2878. The file descriptor @var{fildes} is not valid.
  2879. @item ENOSYS
  2880. @code{aio_cancel} is not implemented.
  2881. @end table
  2882. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this
  2883. function is in fact @code{aio_cancel64} since the LFS interface
  2884. transparently replaces the normal implementation.
  2885. @end deftypefun
  2886. @deftypefun int aio_cancel64 (int @var{fildes}, struct aiocb64 *@var{aiocbp})
  2887. @standards{Unix98, aio.h}
  2888. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}}
  2889. This function is similar to @code{aio_cancel} with the only difference
  2890. that the argument is a reference to a variable of type @code{struct
  2891. aiocb64}.
  2892. When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this
  2893. function is available under the name @code{aio_cancel} and so
  2894. transparently replaces the interface for small files on 32 bit
  2895. machines.
  2896. @end deftypefun
  2897. @node Configuration of AIO
  2898. @subsection How to optimize the AIO implementation
  2899. The POSIX standard does not specify how the AIO functions are
  2900. implemented. They could be system calls, but it is also possible to
  2901. emulate them at userlevel.
  2902. At the time of writing, the available implementation is a user-level
  2903. implementation which uses threads for handling the enqueued requests.
  2904. While this implementation requires making some decisions about
  2905. limitations, hard limitations are something best avoided
  2906. in @theglibc{}. Therefore, @theglibc{} provides a means
  2907. for tuning the AIO implementation according to the individual use.
  2908. @deftp {Data Type} {struct aioinit}
  2909. @standards{GNU, aio.h}
  2910. This data type is used to pass the configuration or tunable parameters
  2911. to the implementation. The program has to initialize the members of
  2912. this struct and pass it to the implementation using the @code{aio_init}
  2913. function.
  2914. @table @code
  2915. @item int aio_threads
  2916. This member specifies the maximal number of threads which may be used
  2917. at any one time.
  2918. @item int aio_num
  2919. This number provides an estimate on the maximal number of simultaneously
  2920. enqueued requests.
  2921. @item int aio_locks
  2922. Unused.
  2923. @item int aio_usedba
  2924. Unused.
  2925. @item int aio_debug
  2926. Unused.
  2927. @item int aio_numusers
  2928. Unused.
  2929. @item int aio_reserved[2]
  2930. Unused.
  2931. @end table
  2932. @end deftp
  2933. @deftypefun void aio_init (const struct aioinit *@var{init})
  2934. @standards{GNU, aio.h}
  2935. @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}}
  2936. @c All changes to global objects are guarded by aio_requests_mutex.
  2937. This function must be called before any other AIO function. Calling it
  2938. is completely voluntary, as it is only meant to help the AIO
  2939. implementation perform better.
  2940. Before calling @code{aio_init}, the members of a variable of
  2941. type @code{struct aioinit} must be initialized. Then a reference to
  2942. this variable is passed as the parameter to @code{aio_init} which itself
  2943. may or may not pay attention to the hints.
  2944. The function has no return value and no error cases are defined. It is
  2945. an extension which follows a proposal from the SGI implementation in
  2946. @w{Irix 6}. It is not covered by POSIX.1b or Unix98.
  2947. @end deftypefun
  2948. @node Control Operations
  2949. @section Control Operations on Files
  2950. @cindex control operations on files
  2951. @cindex @code{fcntl} function
  2952. This section describes how you can perform various other operations on
  2953. file descriptors, such as inquiring about or setting flags describing
  2954. the status of the file descriptor, manipulating record locks, and the
  2955. like. All of these operations are performed by the function @code{fcntl}.
  2956. The second argument to the @code{fcntl} function is a command that
  2957. specifies which operation to perform. The function and macros that name
  2958. various flags that are used with it are declared in the header file
  2959. @file{fcntl.h}. Many of these flags are also used by the @code{open}
  2960. function; see @ref{Opening and Closing Files}.
  2961. @pindex fcntl.h
  2962. @deftypefun int fcntl (int @var{filedes}, int @var{command}, @dots{})
  2963. @standards{POSIX.1, fcntl.h}
  2964. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  2965. The @code{fcntl} function performs the operation specified by
  2966. @var{command} on the file descriptor @var{filedes}. Some commands
  2967. require additional arguments to be supplied. These additional arguments
  2968. and the return value and error conditions are given in the detailed
  2969. descriptions of the individual commands.
  2970. Briefly, here is a list of what the various commands are. For an
  2971. exhaustive list of kernel-specific options, please see @xref{System
  2972. Calls}.
  2973. @vtable @code
  2974. @item F_DUPFD
  2975. Duplicate the file descriptor (return another file descriptor pointing
  2976. to the same open file). @xref{Duplicating Descriptors}.
  2977. @item F_GETFD
  2978. Get flags associated with the file descriptor. @xref{Descriptor Flags}.
  2979. @item F_SETFD
  2980. Set flags associated with the file descriptor. @xref{Descriptor Flags}.
  2981. @item F_GETFL
  2982. Get flags associated with the open file. @xref{File Status Flags}.
  2983. @item F_SETFL
  2984. Set flags associated with the open file. @xref{File Status Flags}.
  2985. @item F_GETLK
  2986. Test a file lock. @xref{File Locks}.
  2987. @item F_SETLK
  2988. Set or clear a file lock. @xref{File Locks}.
  2989. @item F_SETLKW
  2990. Like @code{F_SETLK}, but wait for completion. @xref{File Locks}.
  2991. @item F_OFD_GETLK
  2992. Test an open file description lock. @xref{Open File Description Locks}.
  2993. Specific to Linux.
  2994. @item F_OFD_SETLK
  2995. Set or clear an open file description lock. @xref{Open File Description Locks}.
  2996. Specific to Linux.
  2997. @item F_OFD_SETLKW
  2998. Like @code{F_OFD_SETLK}, but block until lock is acquired.
  2999. @xref{Open File Description Locks}. Specific to Linux.
  3000. @item F_GETOWN
  3001. Get process or process group ID to receive @code{SIGIO} signals.
  3002. @xref{Interrupt Input}.
  3003. @item F_SETOWN
  3004. Set process or process group ID to receive @code{SIGIO} signals.
  3005. @xref{Interrupt Input}.
  3006. @end vtable
  3007. This function is a cancellation point in multi-threaded programs for the
  3008. commands @code{F_SETLKW} (and the LFS analogous @code{F_SETLKW64}) and
  3009. @code{F_OFD_SETLKW}. This is a problem if the thread allocates some
  3010. resources (like memory, file descriptors, semaphores or whatever) at the time
  3011. @code{fcntl} is called. If the thread gets canceled these resources stay
  3012. allocated until the program ends. To avoid this calls to @code{fcntl} should
  3013. be protected using cancellation handlers.
  3014. @c ref pthread_cleanup_push / pthread_cleanup_pop
  3015. @end deftypefun
  3016. @node Duplicating Descriptors
  3017. @section Duplicating Descriptors
  3018. @cindex duplicating file descriptors
  3019. @cindex redirecting input and output
  3020. You can @dfn{duplicate} a file descriptor, or allocate another file
  3021. descriptor that refers to the same open file as the original. Duplicate
  3022. descriptors share one file position and one set of file status flags
  3023. (@pxref{File Status Flags}), but each has its own set of file descriptor
  3024. flags (@pxref{Descriptor Flags}).
  3025. The major use of duplicating a file descriptor is to implement
  3026. @dfn{redirection} of input or output: that is, to change the
  3027. file or pipe that a particular file descriptor corresponds to.
  3028. You can perform this operation using the @code{fcntl} function with the
  3029. @code{F_DUPFD} command, but there are also convenient functions
  3030. @code{dup} and @code{dup2} for duplicating descriptors.
  3031. @pindex unistd.h
  3032. @pindex fcntl.h
  3033. The @code{fcntl} function and flags are declared in @file{fcntl.h},
  3034. while prototypes for @code{dup} and @code{dup2} are in the header file
  3035. @file{unistd.h}.
  3036. @deftypefun int dup (int @var{old})
  3037. @standards{POSIX.1, unistd.h}
  3038. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  3039. This function copies descriptor @var{old} to the first available
  3040. descriptor number (the first number not currently open). It is
  3041. equivalent to @code{fcntl (@var{old}, F_DUPFD, 0)}.
  3042. @end deftypefun
  3043. @deftypefun int dup2 (int @var{old}, int @var{new})
  3044. @standards{POSIX.1, unistd.h}
  3045. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  3046. This function copies the descriptor @var{old} to descriptor number
  3047. @var{new}.
  3048. If @var{old} is an invalid descriptor, then @code{dup2} does nothing; it
  3049. does not close @var{new}. Otherwise, the new duplicate of @var{old}
  3050. replaces any previous meaning of descriptor @var{new}, as if @var{new}
  3051. were closed first.
  3052. If @var{old} and @var{new} are different numbers, and @var{old} is a
  3053. valid descriptor number, then @code{dup2} is equivalent to:
  3054. @smallexample
  3055. close (@var{new});
  3056. fcntl (@var{old}, F_DUPFD, @var{new})
  3057. @end smallexample
  3058. However, @code{dup2} does this atomically; there is no instant in the
  3059. middle of calling @code{dup2} at which @var{new} is closed and not yet a
  3060. duplicate of @var{old}.
  3061. @end deftypefun
  3062. @deftypefun int dup3 (int @var{old}, int @var{new}, int @var{flags})
  3063. @standards{POSIX.1-2024, unistd.h}
  3064. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  3065. This function is the same as @code{dup2} but creates the new
  3066. descriptor as if it had been opened with flags @var{flags}. The only
  3067. allowed flag is @code{O_CLOEXEC}.
  3068. This function was originally a Linux extension, but was added in
  3069. POSIX.1-2024.
  3070. @end deftypefun
  3071. @deftypevr Macro int F_DUPFD
  3072. @standards{POSIX.1, fcntl.h}
  3073. This macro is used as the @var{command} argument to @code{fcntl}, to
  3074. copy the file descriptor given as the first argument.
  3075. The form of the call in this case is:
  3076. @smallexample
  3077. fcntl (@var{old}, F_DUPFD, @var{next-filedes})
  3078. @end smallexample
  3079. The @var{next-filedes} argument is of type @code{int} and specifies that
  3080. the file descriptor returned should be the next available one greater
  3081. than or equal to this value.
  3082. The return value from @code{fcntl} with this command is normally the value
  3083. of the new file descriptor. A return value of @math{-1} indicates an
  3084. error. The following @code{errno} error conditions are defined for
  3085. this command:
  3086. @table @code
  3087. @item EBADF
  3088. The @var{old} argument is invalid.
  3089. @item EINVAL
  3090. The @var{next-filedes} argument is invalid.
  3091. @item EMFILE
  3092. There are no more file descriptors available---your program is already
  3093. using the maximum. In BSD and GNU, the maximum is controlled by a
  3094. resource limit that can be changed; @pxref{Limits on Resources}, for
  3095. more information about the @code{RLIMIT_NOFILE} limit.
  3096. @end table
  3097. @code{ENFILE} is not a possible error code for @code{dup2} because
  3098. @code{dup2} does not create a new opening of a file; duplicate
  3099. descriptors do not count toward the limit which @code{ENFILE}
  3100. indicates. @code{EMFILE} is possible because it refers to the limit on
  3101. distinct descriptor numbers in use in one process.
  3102. @end deftypevr
  3103. Here is an example showing how to use @code{dup2} to do redirection.
  3104. Typically, redirection of the standard streams (like @code{stdin}) is
  3105. done by a shell or shell-like program before calling one of the
  3106. @code{exec} functions (@pxref{Executing a File}) to execute a new
  3107. program in a child process. When the new program is executed, it
  3108. creates and initializes the standard streams to point to the
  3109. corresponding file descriptors, before its @code{main} function is
  3110. invoked.
  3111. So, to redirect standard input to a file, the shell could do something
  3112. like:
  3113. @smallexample
  3114. pid = fork ();
  3115. if (pid == 0)
  3116. @{
  3117. char *filename;
  3118. char *program;
  3119. int file;
  3120. @dots{}
  3121. file = TEMP_FAILURE_RETRY (open (filename, O_RDONLY));
  3122. dup2 (file, STDIN_FILENO);
  3123. TEMP_FAILURE_RETRY (close (file));
  3124. execv (program, NULL);
  3125. @}
  3126. @end smallexample
  3127. There is also a more detailed example showing how to implement redirection
  3128. in the context of a pipeline of processes in @ref{Launching Jobs}.
  3129. @node Descriptor Flags
  3130. @section File Descriptor Flags
  3131. @cindex file descriptor flags
  3132. @dfn{File descriptor flags} are miscellaneous attributes of a file
  3133. descriptor. These flags are associated with particular file
  3134. descriptors, so that if you have created duplicate file descriptors
  3135. from a single opening of a file, each descriptor has its own set of flags.
  3136. Currently there is just one file descriptor flag: @code{FD_CLOEXEC},
  3137. which causes the descriptor to be closed if you use any of the
  3138. @code{exec@dots{}} functions (@pxref{Executing a File}).
  3139. The symbols in this section are defined in the header file
  3140. @file{fcntl.h}.
  3141. @pindex fcntl.h
  3142. @deftypevr Macro int F_GETFD
  3143. @standards{POSIX.1, fcntl.h}
  3144. This macro is used as the @var{command} argument to @code{fcntl}, to
  3145. specify that it should return the file descriptor flags associated
  3146. with the @var{filedes} argument.
  3147. The normal return value from @code{fcntl} with this command is a
  3148. nonnegative number which can be interpreted as the bitwise OR of the
  3149. individual flags (except that currently there is only one flag to use).
  3150. In case of an error, @code{fcntl} returns @math{-1}. The following
  3151. @code{errno} error conditions are defined for this command:
  3152. @table @code
  3153. @item EBADF
  3154. The @var{filedes} argument is invalid.
  3155. @end table
  3156. @end deftypevr
  3157. @deftypevr Macro int F_SETFD
  3158. @standards{POSIX.1, fcntl.h}
  3159. This macro is used as the @var{command} argument to @code{fcntl}, to
  3160. specify that it should set the file descriptor flags associated with the
  3161. @var{filedes} argument. This requires a third @code{int} argument to
  3162. specify the new flags, so the form of the call is:
  3163. @smallexample
  3164. fcntl (@var{filedes}, F_SETFD, @var{new-flags})
  3165. @end smallexample
  3166. The normal return value from @code{fcntl} with this command is an
  3167. unspecified value other than @math{-1}, which indicates an error.
  3168. The flags and error conditions are the same as for the @code{F_GETFD}
  3169. command.
  3170. @end deftypevr
  3171. The following macro is defined for use as a file descriptor flag with
  3172. the @code{fcntl} function. The value is an integer constant usable
  3173. as a bit mask value.
  3174. @deftypevr Macro int FD_CLOEXEC
  3175. @standards{POSIX.1, fcntl.h}
  3176. @cindex close-on-exec (file descriptor flag)
  3177. This flag specifies that the file descriptor should be closed when
  3178. an @code{exec} function is invoked; see @ref{Executing a File}. When
  3179. a file descriptor is allocated (as with @code{open} or @code{dup}),
  3180. this bit is initially cleared on the new file descriptor, meaning that
  3181. descriptor will survive into the new program after @code{exec}.
  3182. @end deftypevr
  3183. If you want to modify the file descriptor flags, you should get the
  3184. current flags with @code{F_GETFD} and modify the value. Don't assume
  3185. that the flags listed here are the only ones that are implemented; your
  3186. program may be run years from now and more flags may exist then. For
  3187. example, here is a function to set or clear the flag @code{FD_CLOEXEC}
  3188. without altering any other flags:
  3189. @smallexample
  3190. /* @r{Set the @code{FD_CLOEXEC} flag of @var{desc} if @var{value} is nonzero,}
  3191. @r{or clear the flag if @var{value} is 0.}
  3192. @r{Return 0 on success, or -1 on error with @code{errno} set.} */
  3193. int
  3194. set_cloexec_flag (int desc, int value)
  3195. @{
  3196. int oldflags = fcntl (desc, F_GETFD, 0);
  3197. /* @r{If reading the flags failed, return error indication now.} */
  3198. if (oldflags < 0)
  3199. return oldflags;
  3200. /* @r{Set just the flag we want to set.} */
  3201. if (value != 0)
  3202. oldflags |= FD_CLOEXEC;
  3203. else
  3204. oldflags &= ~FD_CLOEXEC;
  3205. /* @r{Store modified flag word in the descriptor.} */
  3206. return fcntl (desc, F_SETFD, oldflags);
  3207. @}
  3208. @end smallexample
  3209. @node File Status Flags
  3210. @section File Status Flags
  3211. @cindex file status flags
  3212. @dfn{File status flags} are used to specify attributes of the opening of a
  3213. file. Unlike the file descriptor flags discussed in @ref{Descriptor
  3214. Flags}, the file status flags are shared by duplicated file descriptors
  3215. resulting from a single opening of the file. The file status flags are
  3216. specified with the @var{flags} argument to @code{open};
  3217. @pxref{Opening and Closing Files}.
  3218. File status flags fall into three categories, which are described in the
  3219. following sections.
  3220. @itemize @bullet
  3221. @item
  3222. @ref{Access Modes}, specify what type of access is allowed to the
  3223. file: reading, writing, or both. They are set by @code{open} and are
  3224. returned by @code{fcntl}, but cannot be changed.
  3225. @item
  3226. @ref{Open-time Flags}, control details of what @code{open} will do.
  3227. These flags are not preserved after the @code{open} call.
  3228. @item
  3229. @ref{Operating Modes}, affect how operations such as @code{read} and
  3230. @code{write} are done. They are set by @code{open}, and can be fetched or
  3231. changed with @code{fcntl}.
  3232. @end itemize
  3233. The symbols in this section are defined in the header file
  3234. @file{fcntl.h}.
  3235. @pindex fcntl.h
  3236. @menu
  3237. * Access Modes:: Whether the descriptor can read or write.
  3238. * Open-time Flags:: Details of @code{open}.
  3239. * Operating Modes:: Special modes to control I/O operations.
  3240. * Getting File Status Flags:: Fetching and changing these flags.
  3241. @end menu
  3242. @node Access Modes
  3243. @subsection File Access Modes
  3244. The file access mode allows a file descriptor to be used for reading,
  3245. writing, both, or neither. The access mode is determined when the file
  3246. is opened, and never change.
  3247. @deftypevr Macro int O_RDONLY
  3248. @standards{POSIX.1, fcntl.h}
  3249. Open the file for read access.
  3250. @end deftypevr
  3251. @deftypevr Macro int O_WRONLY
  3252. @standards{POSIX.1, fcntl.h}
  3253. Open the file for write access.
  3254. @end deftypevr
  3255. @deftypevr Macro int O_RDWR
  3256. @standards{POSIX.1, fcntl.h}
  3257. Open the file for both reading and writing.
  3258. @end deftypevr
  3259. @deftypevr Macro int O_PATH
  3260. @standards{Linux, fcntl.h}
  3261. Obtain a file descriptor for the file, but do not open the file for
  3262. reading or writing. Permission checks for the file itself are skipped
  3263. when the file is opened (but permission to access the directory that
  3264. contains it is still needed), and permissions are checked when the
  3265. descriptor is used later on.
  3266. For example, such descriptors can be used with the @code{fexecve}
  3267. function (@pxref{Executing a File}). Other applications involve the
  3268. @samp{*at} function variants, along with the @code{AT_EMPTY_PATH} flag.
  3269. @xref{Descriptor-Relative Access}.
  3270. This access mode is specific to Linux. On @gnuhurdsystems{}, it is
  3271. possible to use @code{O_EXEC} explicitly, or specify no access modes
  3272. at all (see below).
  3273. @end deftypevr
  3274. The portable file access modes @code{O_RDONLY}, @code{O_WRONLY}, and
  3275. @code{O_RDWR} may not correspond to individual bits. To determine the
  3276. file access mode with @code{fcntl}, you must extract the access mode
  3277. bits from the retrieved file status flags, using the @code{O_ACCMODE}
  3278. mask.
  3279. @deftypevr Macro int O_ACCMODE
  3280. @standards{POSIX.1, fcntl.h}
  3281. This macro is a mask that can be bitwise-ANDed with the file status flag
  3282. value to recover the file access mode, assuming that a standard file
  3283. access mode is in use.
  3284. @end deftypevr
  3285. If a non-standard file access mode is used (such as @code{O_PATH} or
  3286. @code{O_EXEC}), masking with @code{O_ACCMODE} may give incorrect
  3287. results. These non-standard access modes are identified by individual
  3288. bits and have to be checked directly (without masking with
  3289. @code{O_ACCMODE} first).
  3290. On @gnuhurdsystems{} (but not on other systems), @code{O_RDONLY} and
  3291. @code{O_WRONLY} are independent bits that can be bitwise-ORed together,
  3292. and it is valid for either bit to be set or clear. This means that
  3293. @code{O_RDWR} is the same as @code{O_RDONLY|O_WRONLY}. A file access
  3294. mode of zero is permissible; it allows no operations that do input or
  3295. output to the file, but does allow other operations such as
  3296. @code{fchmod}. On @gnuhurdsystems{}, since ``read-only'' or ``write-only''
  3297. is a misnomer, @file{fcntl.h} defines additional names for the file
  3298. access modes.
  3299. @deftypevr Macro int O_READ
  3300. @standards{GNU, fcntl.h (optional)}
  3301. Open the file for reading. Same as @code{O_RDONLY}; only defined on GNU/Hurd.
  3302. @end deftypevr
  3303. @deftypevr Macro int O_WRITE
  3304. @standards{GNU, fcntl.h (optional)}
  3305. Open the file for writing. Same as @code{O_WRONLY}; only defined on GNU/Hurd.
  3306. @end deftypevr
  3307. @deftypevr Macro int O_EXEC
  3308. @standards{GNU, fcntl.h (optional)}
  3309. Open the file for executing. Only defined on GNU/Hurd.
  3310. @end deftypevr
  3311. @node Open-time Flags
  3312. @subsection Open-time Flags
  3313. The open-time flags specify options affecting how @code{open} will behave.
  3314. These options are not preserved once the file is open. The exception to
  3315. this is @code{O_NONBLOCK}, which is also an I/O operating mode and so it
  3316. @emph{is} saved. @xref{Opening and Closing Files}, for how to call
  3317. @code{open}.
  3318. There are two sorts of options specified by open-time flags.
  3319. @itemize @bullet
  3320. @item
  3321. @dfn{File name translation flags} affect how @code{open} looks up the
  3322. file name to locate the file, and whether the file can be created.
  3323. @cindex file name translation flags
  3324. @cindex flags, file name translation
  3325. @item
  3326. @dfn{Open-time action flags} specify extra operations that @code{open} will
  3327. perform on the file once it is open.
  3328. @cindex open-time action flags
  3329. @cindex flags, open-time action
  3330. @end itemize
  3331. Here are the file name translation flags.
  3332. @deftypevr Macro int O_CREAT
  3333. @standards{POSIX.1, fcntl.h}
  3334. If set, the file will be created if it doesn't already exist.
  3335. @c !!! mode arg, umask
  3336. @cindex create on open (file status flag)
  3337. @end deftypevr
  3338. @deftypevr Macro int O_EXCL
  3339. @standards{POSIX.1, fcntl.h}
  3340. If both @code{O_CREAT} and @code{O_EXCL} are set, then @code{open} fails
  3341. if the specified file already exists. This is guaranteed to never
  3342. clobber an existing file.
  3343. The @code{O_EXCL} flag has a special meaning in combination with
  3344. @code{O_TMPFILE}; see below.
  3345. @end deftypevr
  3346. @deftypevr Macro int O_DIRECTORY
  3347. @standards{POSIX.1, fcntl.h}
  3348. If set, the open operation fails if the given name is not the name of
  3349. a directory. The @code{errno} variable is set to @code{ENOTDIR} for
  3350. this error condition.
  3351. @end deftypevr
  3352. @deftypevr Macro int O_NOFOLLOW
  3353. @standards{POSIX.1, fcntl.h}
  3354. If set, the open operation fails if the final component of the file name
  3355. refers to a symbolic link. The @code{errno} variable is set to
  3356. @code{ELOOP} for this error condition.
  3357. @end deftypevr
  3358. @deftypevr Macro int O_TMPFILE
  3359. @standards{GNU, fcntl.h}
  3360. If this flag is specified, functions in the @code{open} family create an
  3361. unnamed temporary file. In this case, the pathname argument to the
  3362. @code{open} family of functions (@pxref{Opening and Closing Files}) is
  3363. interpreted as the directory in which the temporary file is created
  3364. (thus determining the file system which provides the storage for the
  3365. file). The @code{O_TMPFILE} flag must be combined with @code{O_WRONLY}
  3366. or @code{O_RDWR}, and the @var{mode} argument is required.
  3367. The temporary file can later be given a name using @code{linkat},
  3368. turning it into a regular file. This allows the atomic creation of a
  3369. file with the specific file attributes (mode and extended attributes)
  3370. and file contents. If, for security reasons, it is not desirable that a
  3371. name can be given to the file, the @code{O_EXCL} flag can be specified
  3372. along with @code{O_TMPFILE}.
  3373. Not all kernels support this open flag. If this flag is unsupported, an
  3374. attempt to create an unnamed temporary file fails with an error of
  3375. @code{EINVAL}. If the underlying file system does not support the
  3376. @code{O_TMPFILE} flag, an @code{EOPNOTSUPP} error is the result.
  3377. The @code{O_TMPFILE} flag is a GNU extension.
  3378. @end deftypevr
  3379. @deftypevr Macro int O_NONBLOCK
  3380. @standards{POSIX.1, fcntl.h}
  3381. @cindex non-blocking open
  3382. This prevents @code{open} from blocking for a ``long time'' to open the
  3383. file. This is only meaningful for some kinds of files, usually devices
  3384. such as serial ports; when it is not meaningful, it is harmless and
  3385. ignored. Often, opening a port to a modem blocks until the modem reports
  3386. carrier detection; if @code{O_NONBLOCK} is specified, @code{open} will
  3387. return immediately without a carrier.
  3388. Note that the @code{O_NONBLOCK} flag is overloaded as both an I/O operating
  3389. mode and a file name translation flag. This means that specifying
  3390. @code{O_NONBLOCK} in @code{open} also sets nonblocking I/O mode;
  3391. @pxref{Operating Modes}. To open the file without blocking but do normal
  3392. I/O that blocks, you must call @code{open} with @code{O_NONBLOCK} set and
  3393. then call @code{fcntl} to turn the bit off.
  3394. @end deftypevr
  3395. @deftypevr Macro int O_NOCTTY
  3396. @standards{POSIX.1, fcntl.h}
  3397. If the named file is a terminal device, don't make it the controlling
  3398. terminal for the process. @xref{Job Control}, for information about
  3399. what it means to be the controlling terminal.
  3400. On @gnuhurdsystems{} and 4.4 BSD, opening a file never makes it the
  3401. controlling terminal and @code{O_NOCTTY} is zero. However, @gnulinuxsystems{}
  3402. and some other systems use a nonzero value for @code{O_NOCTTY} and set the
  3403. controlling terminal when you open a file that is a terminal device; so
  3404. to be portable, use @code{O_NOCTTY} when it is important to avoid this.
  3405. @cindex controlling terminal, setting
  3406. @end deftypevr
  3407. The following three file name translation flags exist only on
  3408. @gnuhurdsystems{}.
  3409. @deftypevr Macro int O_IGNORE_CTTY
  3410. @standards{GNU, fcntl.h (optional)}
  3411. Do not recognize the named file as the controlling terminal, even if it
  3412. refers to the process's existing controlling terminal device. Operations
  3413. on the new file descriptor will never induce job control signals.
  3414. @xref{Job Control}.
  3415. @end deftypevr
  3416. @deftypevr Macro int O_NOLINK
  3417. @standards{GNU, fcntl.h (optional)}
  3418. If the named file is a symbolic link, open the link itself instead of
  3419. the file it refers to. (@code{fstat} on the new file descriptor will
  3420. return the information returned by @code{lstat} on the link's name.)
  3421. @cindex symbolic link, opening
  3422. @end deftypevr
  3423. @deftypevr Macro int O_NOTRANS
  3424. @standards{GNU, fcntl.h (optional)}
  3425. If the named file is specially translated, do not invoke the translator.
  3426. Open the bare file the translator itself sees.
  3427. @end deftypevr
  3428. The open-time action flags tell @code{open} to do additional operations
  3429. which are not really related to opening the file. The reason to do them
  3430. as part of @code{open} instead of in separate calls is that @code{open}
  3431. can do them @i{atomically}.
  3432. @deftypevr Macro int O_TRUNC
  3433. @standards{POSIX.1, fcntl.h}
  3434. Truncate the file to zero length. This option is only useful for
  3435. regular files, not special files such as directories or FIFOs. POSIX.1
  3436. requires that you open the file for writing to use @code{O_TRUNC}. In
  3437. BSD and GNU you must have permission to write the file to truncate it,
  3438. but you need not open for write access.
  3439. This is the only open-time action flag specified by POSIX.1. There is
  3440. no good reason for truncation to be done by @code{open}, instead of by
  3441. calling @code{ftruncate} afterwards. The @code{O_TRUNC} flag existed in
  3442. Unix before @code{ftruncate} was invented, and is retained for backward
  3443. compatibility.
  3444. @end deftypevr
  3445. The remaining operating modes are BSD extensions. They exist only
  3446. on some systems. On other systems, these macros are not defined.
  3447. @deftypevr Macro int O_SHLOCK
  3448. @standards{BSD, fcntl.h (optional)}
  3449. Acquire a shared lock on the file, as with @code{flock}.
  3450. @xref{File Locks}.
  3451. If @code{O_CREAT} is specified, the locking is done atomically when
  3452. creating the file. You are guaranteed that no other process will get
  3453. the lock on the new file first.
  3454. @end deftypevr
  3455. @deftypevr Macro int O_EXLOCK
  3456. @standards{BSD, fcntl.h (optional)}
  3457. Acquire an exclusive lock on the file, as with @code{flock}.
  3458. @xref{File Locks}. This is atomic like @code{O_SHLOCK}.
  3459. @end deftypevr
  3460. @node Operating Modes
  3461. @subsection I/O Operating Modes
  3462. The operating modes affect how input and output operations using a file
  3463. descriptor work. These flags are set by @code{open} and can be fetched
  3464. and changed with @code{fcntl}.
  3465. @deftypevr Macro int O_APPEND
  3466. @standards{POSIX.1, fcntl.h}
  3467. The bit that enables append mode for the file. If set, then all
  3468. @code{write} operations write the data at the end of the file, extending
  3469. it, regardless of the current file position. This is the only reliable
  3470. way to append to a file. In append mode, you are guaranteed that the
  3471. data you write will always go to the current end of the file, regardless
  3472. of other processes writing to the file. Conversely, if you simply set
  3473. the file position to the end of file and write, then another process can
  3474. extend the file after you set the file position but before you write,
  3475. resulting in your data appearing someplace before the real end of file.
  3476. @end deftypevr
  3477. @deftypevr Macro int O_NONBLOCK
  3478. @standards{POSIX.1, fcntl.h}
  3479. The bit that enables nonblocking mode for the file. If this bit is set,
  3480. @code{read} requests on the file can return immediately with a failure
  3481. status if there is no input immediately available, instead of blocking.
  3482. Likewise, @code{write} requests can also return immediately with a
  3483. failure status if the output can't be written immediately.
  3484. Note that the @code{O_NONBLOCK} flag is overloaded as both an I/O
  3485. operating mode and a file name translation flag; @pxref{Open-time Flags}.
  3486. @end deftypevr
  3487. @deftypevr Macro int O_NDELAY
  3488. @standards{BSD, fcntl.h}
  3489. This is an obsolete name for @code{O_NONBLOCK}, provided for
  3490. compatibility with BSD. It is not defined by the POSIX.1 standard.
  3491. @end deftypevr
  3492. The remaining operating modes are BSD and GNU extensions. They exist only
  3493. on some systems. On other systems, these macros are not defined.
  3494. @deftypevr Macro int O_ASYNC
  3495. @standards{BSD, fcntl.h}
  3496. The bit that enables asynchronous input mode. If set, then @code{SIGIO}
  3497. signals will be generated when input is available. @xref{Interrupt Input}.
  3498. Asynchronous input mode is a BSD feature.
  3499. @end deftypevr
  3500. @deftypevr Macro int O_FSYNC
  3501. @standards{BSD, fcntl.h}
  3502. The bit that enables synchronous writing for the file. If set, each
  3503. @code{write} call will make sure the data is reliably stored on disk before
  3504. returning. @c !!! xref fsync
  3505. Synchronous writing is a BSD feature.
  3506. @end deftypevr
  3507. @deftypevr Macro int O_SYNC
  3508. @standards{BSD, fcntl.h}
  3509. This is another name for @code{O_FSYNC}. They have the same value.
  3510. @end deftypevr
  3511. @deftypevr Macro int O_NOATIME
  3512. @standards{GNU, fcntl.h}
  3513. If this bit is set, @code{read} will not update the access time of the
  3514. file. @xref{File Times}. This is used by programs that do backups, so
  3515. that backing a file up does not count as reading it.
  3516. Only the owner of the file or the superuser may use this bit.
  3517. This is a GNU extension.
  3518. @end deftypevr
  3519. @node Getting File Status Flags
  3520. @subsection Getting and Setting File Status Flags
  3521. The @code{fcntl} function can fetch or change file status flags.
  3522. @deftypevr Macro int F_GETFL
  3523. @standards{POSIX.1, fcntl.h}
  3524. This macro is used as the @var{command} argument to @code{fcntl}, to
  3525. read the file status flags for the open file with descriptor
  3526. @var{filedes}.
  3527. The normal return value from @code{fcntl} with this command is a
  3528. nonnegative number which can be interpreted as the bitwise OR of the
  3529. individual flags. Since the file access modes are not single-bit values,
  3530. you can mask off other bits in the returned flags with @code{O_ACCMODE}
  3531. to compare them.
  3532. In case of an error, @code{fcntl} returns @math{-1}. The following
  3533. @code{errno} error conditions are defined for this command:
  3534. @table @code
  3535. @item EBADF
  3536. The @var{filedes} argument is invalid.
  3537. @end table
  3538. @end deftypevr
  3539. @deftypevr Macro int F_SETFL
  3540. @standards{POSIX.1, fcntl.h}
  3541. This macro is used as the @var{command} argument to @code{fcntl}, to set
  3542. the file status flags for the open file corresponding to the
  3543. @var{filedes} argument. This command requires a third @code{int}
  3544. argument to specify the new flags, so the call looks like this:
  3545. @smallexample
  3546. fcntl (@var{filedes}, F_SETFL, @var{new-flags})
  3547. @end smallexample
  3548. You can't change the access mode for the file in this way; that is,
  3549. whether the file descriptor was opened for reading or writing.
  3550. The normal return value from @code{fcntl} with this command is an
  3551. unspecified value other than @math{-1}, which indicates an error. The
  3552. error conditions are the same as for the @code{F_GETFL} command.
  3553. @end deftypevr
  3554. If you want to modify the file status flags, you should get the current
  3555. flags with @code{F_GETFL} and modify the value. Don't assume that the
  3556. flags listed here are the only ones that are implemented; your program
  3557. may be run years from now and more flags may exist then. For example,
  3558. here is a function to set or clear the flag @code{O_NONBLOCK} without
  3559. altering any other flags:
  3560. @smallexample
  3561. @group
  3562. /* @r{Set the @code{O_NONBLOCK} flag of @var{desc} if @var{value} is nonzero,}
  3563. @r{or clear the flag if @var{value} is 0.}
  3564. @r{Return 0 on success, or -1 on error with @code{errno} set.} */
  3565. int
  3566. set_nonblock_flag (int desc, int value)
  3567. @{
  3568. int oldflags = fcntl (desc, F_GETFL, 0);
  3569. /* @r{If reading the flags failed, return error indication now.} */
  3570. if (oldflags == -1)
  3571. return -1;
  3572. /* @r{Set just the flag we want to set.} */
  3573. if (value != 0)
  3574. oldflags |= O_NONBLOCK;
  3575. else
  3576. oldflags &= ~O_NONBLOCK;
  3577. /* @r{Store modified flag word in the descriptor.} */
  3578. return fcntl (desc, F_SETFL, oldflags);
  3579. @}
  3580. @end group
  3581. @end smallexample
  3582. @node File Locks
  3583. @section File Locks
  3584. @cindex file locks
  3585. @cindex record locking
  3586. This section describes record locks that are associated with the process.
  3587. There is also a different type of record lock that is associated with the
  3588. open file description instead of the process. @xref{Open File Description Locks}.
  3589. The remaining @code{fcntl} commands are used to support @dfn{record
  3590. locking}, which permits multiple cooperating programs to prevent each
  3591. other from simultaneously accessing parts of a file in error-prone
  3592. ways.
  3593. @cindex exclusive lock
  3594. @cindex write lock
  3595. An @dfn{exclusive} or @dfn{write} lock gives a process exclusive access
  3596. for writing to the specified part of the file. While a write lock is in
  3597. place, no other process can lock that part of the file.
  3598. @cindex shared lock
  3599. @cindex read lock
  3600. A @dfn{shared} or @dfn{read} lock prohibits any other process from
  3601. requesting a write lock on the specified part of the file. However,
  3602. other processes can request read locks.
  3603. The @code{read} and @code{write} functions do not actually check to see
  3604. whether there are any locks in place. If you want to implement a
  3605. locking protocol for a file shared by multiple processes, your application
  3606. must do explicit @code{fcntl} calls to request and clear locks at the
  3607. appropriate points.
  3608. Locks are associated with processes. A process can only have one kind
  3609. of lock set for each byte of a given file. When any file descriptor for
  3610. that file is closed by the process, all of the locks that process holds
  3611. on that file are released, even if the locks were made using other
  3612. descriptors that remain open. Likewise, locks are released when a
  3613. process exits, and are not inherited by child processes created using
  3614. @code{fork} (@pxref{Creating a Process}).
  3615. When making a lock, use a @code{struct flock} to specify what kind of
  3616. lock and where. This data type and the associated macros for the
  3617. @code{fcntl} function are declared in the header file @file{fcntl.h}.
  3618. @pindex fcntl.h
  3619. @deftp {Data Type} {struct flock}
  3620. @standards{POSIX.1, fcntl.h}
  3621. This structure is used with the @code{fcntl} function to describe a file
  3622. lock. It has these members:
  3623. @table @code
  3624. @item short int l_type
  3625. Specifies the type of the lock; one of @code{F_RDLCK}, @code{F_WRLCK}, or
  3626. @code{F_UNLCK}.
  3627. @item short int l_whence
  3628. This corresponds to the @var{whence} argument to @code{fseek} or
  3629. @code{lseek}, and specifies what the offset is relative to. Its value
  3630. can be one of @code{SEEK_SET}, @code{SEEK_CUR}, or @code{SEEK_END}.
  3631. @item off_t l_start
  3632. This specifies the offset of the start of the region to which the lock
  3633. applies, and is given in bytes relative to the point specified by the
  3634. @code{l_whence} member.
  3635. @item off_t l_len
  3636. This specifies the length of the region to be locked. A value of
  3637. @code{0} is treated specially; it means the region extends to the end of
  3638. the file.
  3639. @item pid_t l_pid
  3640. This field is the process ID (@pxref{Process Creation Concepts}) of the
  3641. process holding the lock. It is filled in by calling @code{fcntl} with
  3642. the @code{F_GETLK} command, but is ignored when making a lock. If the
  3643. conflicting lock is an open file description lock
  3644. (@pxref{Open File Description Locks}), then this field will be set to
  3645. @math{-1}.
  3646. @end table
  3647. @end deftp
  3648. @deftypevr Macro int F_GETLK
  3649. @standards{POSIX.1, fcntl.h}
  3650. This macro is used as the @var{command} argument to @code{fcntl}, to
  3651. specify that it should get information about a lock. This command
  3652. requires a third argument of type @w{@code{struct flock *}} to be passed
  3653. to @code{fcntl}, so that the form of the call is:
  3654. @smallexample
  3655. fcntl (@var{filedes}, F_GETLK, @var{lockp})
  3656. @end smallexample
  3657. If there is a lock already in place that would block the lock described
  3658. by the @var{lockp} argument, information about that lock overwrites
  3659. @code{*@var{lockp}}. Existing locks are not reported if they are
  3660. compatible with making a new lock as specified. Thus, you should
  3661. specify a lock type of @code{F_WRLCK} if you want to find out about both
  3662. read and write locks, or @code{F_RDLCK} if you want to find out about
  3663. write locks only.
  3664. There might be more than one lock affecting the region specified by the
  3665. @var{lockp} argument, but @code{fcntl} only returns information about
  3666. one of them. The @code{l_whence} member of the @var{lockp} structure is
  3667. set to @code{SEEK_SET} and the @code{l_start} and @code{l_len} fields
  3668. set to identify the locked region.
  3669. If no lock applies, the only change to the @var{lockp} structure is to
  3670. update the @code{l_type} to a value of @code{F_UNLCK}.
  3671. The normal return value from @code{fcntl} with this command is an
  3672. unspecified value other than @math{-1}, which is reserved to indicate an
  3673. error. The following @code{errno} error conditions are defined for
  3674. this command:
  3675. @table @code
  3676. @item EBADF
  3677. The @var{filedes} argument is invalid.
  3678. @item EINVAL
  3679. Either the @var{lockp} argument doesn't specify valid lock information,
  3680. or the file associated with @var{filedes} doesn't support locks.
  3681. @end table
  3682. @end deftypevr
  3683. @deftypevr Macro int F_SETLK
  3684. @standards{POSIX.1, fcntl.h}
  3685. This macro is used as the @var{command} argument to @code{fcntl}, to
  3686. specify that it should set or clear a lock. This command requires a
  3687. third argument of type @w{@code{struct flock *}} to be passed to
  3688. @code{fcntl}, so that the form of the call is:
  3689. @smallexample
  3690. fcntl (@var{filedes}, F_SETLK, @var{lockp})
  3691. @end smallexample
  3692. If the process already has a lock on any part of the region, the old lock
  3693. on that part is replaced with the new lock. You can remove a lock
  3694. by specifying a lock type of @code{F_UNLCK}.
  3695. If the lock cannot be set, @code{fcntl} returns immediately with a value
  3696. of @math{-1}. This function does not block while waiting for other processes
  3697. to release locks. If @code{fcntl} succeeds, it returns a value other
  3698. than @math{-1}.
  3699. The following @code{errno} error conditions are defined for this
  3700. function:
  3701. @table @code
  3702. @item EAGAIN
  3703. @itemx EACCES
  3704. The lock cannot be set because it is blocked by an existing lock on the
  3705. file. Some systems use @code{EAGAIN} in this case, and other systems
  3706. use @code{EACCES}; your program should treat them alike, after
  3707. @code{F_SETLK}. (@gnulinuxhurdsystems{} always use @code{EAGAIN}.)
  3708. @item EBADF
  3709. Either: the @var{filedes} argument is invalid; you requested a read lock
  3710. but the @var{filedes} is not open for read access; or, you requested a
  3711. write lock but the @var{filedes} is not open for write access.
  3712. @item EINVAL
  3713. Either the @var{lockp} argument doesn't specify valid lock information,
  3714. or the file associated with @var{filedes} doesn't support locks.
  3715. @item ENOLCK
  3716. The system has run out of file lock resources; there are already too
  3717. many file locks in place.
  3718. Well-designed file systems never report this error, because they have no
  3719. limitation on the number of locks. However, you must still take account
  3720. of the possibility of this error, as it could result from network access
  3721. to a file system on another machine.
  3722. @end table
  3723. @end deftypevr
  3724. @deftypevr Macro int F_SETLKW
  3725. @standards{POSIX.1, fcntl.h}
  3726. This macro is used as the @var{command} argument to @code{fcntl}, to
  3727. specify that it should set or clear a lock. It is just like the
  3728. @code{F_SETLK} command, but causes the process to block (or wait)
  3729. until the request can be specified.
  3730. This command requires a third argument of type @code{struct flock *}, as
  3731. for the @code{F_SETLK} command.
  3732. The @code{fcntl} return values and errors are the same as for the
  3733. @code{F_SETLK} command, but these additional @code{errno} error conditions
  3734. are defined for this command:
  3735. @table @code
  3736. @item EINTR
  3737. The function was interrupted by a signal while it was waiting.
  3738. @xref{Interrupted Primitives}.
  3739. @item EDEADLK
  3740. The specified region is being locked by another process. But that
  3741. process is waiting to lock a region which the current process has
  3742. locked, so waiting for the lock would result in deadlock. The system
  3743. does not guarantee that it will detect all such conditions, but it lets
  3744. you know if it notices one.
  3745. @end table
  3746. @end deftypevr
  3747. The following macros are defined for use as values for the @code{l_type}
  3748. member of the @code{flock} structure. The values are integer constants.
  3749. @vtable @code
  3750. @item F_RDLCK
  3751. @standards{POSIX.1, fcntl.h}
  3752. This macro is used to specify a read (or shared) lock.
  3753. @item F_WRLCK
  3754. @standards{POSIX.1, fcntl.h}
  3755. This macro is used to specify a write (or exclusive) lock.
  3756. @item F_UNLCK
  3757. @standards{POSIX.1, fcntl.h}
  3758. This macro is used to specify that the region is unlocked.
  3759. @end vtable
  3760. As an example of a situation where file locking is useful, consider a
  3761. program that can be run simultaneously by several different users, that
  3762. logs status information to a common file. One example of such a program
  3763. might be a game that uses a file to keep track of high scores. Another
  3764. example might be a program that records usage or accounting information
  3765. for billing purposes.
  3766. Having multiple copies of the program simultaneously writing to the
  3767. file could cause the contents of the file to become mixed up. But
  3768. you can prevent this kind of problem by setting a write lock on the
  3769. file before actually writing to the file.
  3770. If the program also needs to read the file and wants to make sure that
  3771. the contents of the file are in a consistent state, then it can also use
  3772. a read lock. While the read lock is set, no other process can lock
  3773. that part of the file for writing.
  3774. @c ??? This section could use an example program.
  3775. Remember that file locks are only an @emph{advisory} protocol for
  3776. controlling access to a file. There is still potential for access to
  3777. the file by programs that don't use the lock protocol.
  3778. @node Open File Description Locks
  3779. @section Open File Description Locks
  3780. In contrast to process-associated record locks (@pxref{File Locks}),
  3781. open file description record locks are associated with an open file
  3782. description rather than a process.
  3783. Using @code{fcntl} to apply an open file description lock on a region that
  3784. already has an existing open file description lock that was created via the
  3785. same file descriptor will never cause a lock conflict.
  3786. Open file description locks are also inherited by child processes across
  3787. @code{fork}, or @code{clone} with @code{CLONE_FILES} set
  3788. (@pxref{Creating a Process}), along with the file descriptor.
  3789. It is important to distinguish between the open file @emph{description} (an
  3790. instance of an open file, usually created by a call to @code{open}) and
  3791. an open file @emph{descriptor}, which is a numeric value that refers to the
  3792. open file description. The locks described here are associated with the
  3793. open file @emph{description} and not the open file @emph{descriptor}.
  3794. Using @code{dup} (@pxref{Duplicating Descriptors}) to copy a file
  3795. descriptor does not give you a new open file description, but rather copies a
  3796. reference to an existing open file description and assigns it to a new
  3797. file descriptor. Thus, open file description locks set on a file
  3798. descriptor cloned by @code{dup} will never conflict with open file
  3799. description locks set on the original descriptor since they refer to the
  3800. same open file description. Depending on the range and type of lock
  3801. involved, the original lock may be modified by a @code{F_OFD_SETLK} or
  3802. @code{F_OFD_SETLKW} command in this situation however.
  3803. Open file description locks always conflict with process-associated locks,
  3804. even if acquired by the same process or on the same open file
  3805. descriptor.
  3806. Open file description locks use the same @code{struct flock} as
  3807. process-associated locks as an argument (@pxref{File Locks}) and the
  3808. macros for the @code{command} values are also declared in the header file
  3809. @file{fcntl.h}. To use them, the macro @code{_GNU_SOURCE} must be
  3810. defined prior to including any header file.
  3811. In contrast to process-associated locks, any @code{struct flock} used as
  3812. an argument to open file description lock commands must have the @code{l_pid}
  3813. value set to @math{0}. Also, when returning information about an
  3814. open file description lock in a @code{F_GETLK} or @code{F_OFD_GETLK} request,
  3815. the @code{l_pid} field in @code{struct flock} will be set to @math{-1}
  3816. to indicate that the lock is not associated with a process.
  3817. When the same @code{struct flock} is reused as an argument to a
  3818. @code{F_OFD_SETLK} or @code{F_OFD_SETLKW} request after being used for an
  3819. @code{F_OFD_GETLK} request, it is necessary to inspect and reset the
  3820. @code{l_pid} field to @math{0}.
  3821. @pindex fcntl.h.
  3822. @deftypevr Macro int F_OFD_GETLK
  3823. This macro is used as the @var{command} argument to @code{fcntl}, to
  3824. specify that it should get information about a lock. This command
  3825. requires a third argument of type @w{@code{struct flock *}} to be passed
  3826. to @code{fcntl}, so that the form of the call is:
  3827. @smallexample
  3828. fcntl (@var{filedes}, F_OFD_GETLK, @var{lockp})
  3829. @end smallexample
  3830. If there is a lock already in place that would block the lock described
  3831. by the @var{lockp} argument, information about that lock is written to
  3832. @code{*@var{lockp}}. Existing locks are not reported if they are
  3833. compatible with making a new lock as specified. Thus, you should
  3834. specify a lock type of @code{F_WRLCK} if you want to find out about both
  3835. read and write locks, or @code{F_RDLCK} if you want to find out about
  3836. write locks only.
  3837. There might be more than one lock affecting the region specified by the
  3838. @var{lockp} argument, but @code{fcntl} only returns information about
  3839. one of them. Which lock is returned in this situation is undefined.
  3840. The @code{l_whence} member of the @var{lockp} structure are set to
  3841. @code{SEEK_SET} and the @code{l_start} and @code{l_len} fields are set
  3842. to identify the locked region.
  3843. If no conflicting lock exists, the only change to the @var{lockp} structure
  3844. is to update the @code{l_type} field to the value @code{F_UNLCK}.
  3845. The normal return value from @code{fcntl} with this command is either @math{0}
  3846. on success or @math{-1}, which indicates an error. The following @code{errno}
  3847. error conditions are defined for this command:
  3848. @table @code
  3849. @item EBADF
  3850. The @var{filedes} argument is invalid.
  3851. @item EINVAL
  3852. Either the @var{lockp} argument doesn't specify valid lock information,
  3853. the operating system kernel doesn't support open file description locks, or the file
  3854. associated with @var{filedes} doesn't support locks.
  3855. @end table
  3856. @end deftypevr
  3857. @deftypevr Macro int F_OFD_SETLK
  3858. @standards{POSIX.1, fcntl.h}
  3859. This macro is used as the @var{command} argument to @code{fcntl}, to
  3860. specify that it should set or clear a lock. This command requires a
  3861. third argument of type @w{@code{struct flock *}} to be passed to
  3862. @code{fcntl}, so that the form of the call is:
  3863. @smallexample
  3864. fcntl (@var{filedes}, F_OFD_SETLK, @var{lockp})
  3865. @end smallexample
  3866. If the open file already has a lock on any part of the
  3867. region, the old lock on that part is replaced with the new lock. You
  3868. can remove a lock by specifying a lock type of @code{F_UNLCK}.
  3869. If the lock cannot be set, @code{fcntl} returns immediately with a value
  3870. of @math{-1}. This command does not wait for other tasks
  3871. to release locks. If @code{fcntl} succeeds, it returns @math{0}.
  3872. The following @code{errno} error conditions are defined for this
  3873. command:
  3874. @table @code
  3875. @item EAGAIN
  3876. The lock cannot be set because it is blocked by an existing lock on the
  3877. file.
  3878. @item EBADF
  3879. Either: the @var{filedes} argument is invalid; you requested a read lock
  3880. but the @var{filedes} is not open for read access; or, you requested a
  3881. write lock but the @var{filedes} is not open for write access.
  3882. @item EINVAL
  3883. Either the @var{lockp} argument doesn't specify valid lock information,
  3884. the operating system kernel doesn't support open file description locks, or the
  3885. file associated with @var{filedes} doesn't support locks.
  3886. @item ENOLCK
  3887. The system has run out of file lock resources; there are already too
  3888. many file locks in place.
  3889. Well-designed file systems never report this error, because they have no
  3890. limitation on the number of locks. However, you must still take account
  3891. of the possibility of this error, as it could result from network access
  3892. to a file system on another machine.
  3893. @end table
  3894. @end deftypevr
  3895. @deftypevr Macro int F_OFD_SETLKW
  3896. @standards{POSIX.1, fcntl.h}
  3897. This macro is used as the @var{command} argument to @code{fcntl}, to
  3898. specify that it should set or clear a lock. It is just like the
  3899. @code{F_OFD_SETLK} command, but causes the process to wait until the request
  3900. can be completed.
  3901. This command requires a third argument of type @code{struct flock *}, as
  3902. for the @code{F_OFD_SETLK} command.
  3903. The @code{fcntl} return values and errors are the same as for the
  3904. @code{F_OFD_SETLK} command, but these additional @code{errno} error conditions
  3905. are defined for this command:
  3906. @table @code
  3907. @item EINTR
  3908. The function was interrupted by a signal while it was waiting.
  3909. @xref{Interrupted Primitives}.
  3910. @end table
  3911. @end deftypevr
  3912. Open file description locks are useful in the same sorts of situations as
  3913. process-associated locks. They can also be used to synchronize file
  3914. access between threads within the same process by having each thread perform
  3915. its own @code{open} of the file, to obtain its own open file description.
  3916. Because open file description locks are automatically freed only upon
  3917. closing the last file descriptor that refers to the open file
  3918. description, this locking mechanism avoids the possibility that locks
  3919. are inadvertently released due to a library routine opening and closing
  3920. a file without the application being aware.
  3921. As with process-associated locks, open file description locks are advisory.
  3922. @node Open File Description Locks Example
  3923. @section Open File Description Locks Example
  3924. Here is an example of using open file description locks in a threaded
  3925. program. If this program used process-associated locks, then it would be
  3926. subject to data corruption because process-associated locks are shared
  3927. by the threads inside a process, and thus cannot be used by one thread
  3928. to lock out another thread in the same process.
  3929. Proper error handling has been omitted in the following program for
  3930. brevity.
  3931. @smallexample
  3932. @include ofdlocks.c.texi
  3933. @end smallexample
  3934. This example creates three threads each of which loops five times,
  3935. appending to the file. Access to the file is serialized via open file
  3936. description locks. If we compile and run the above program, we'll end up
  3937. with /tmp/foo that has 15 lines in it.
  3938. If we, however, were to replace the @code{F_OFD_SETLK} and
  3939. @code{F_OFD_SETLKW} commands with their process-associated lock
  3940. equivalents, the locking essentially becomes a noop since it is all done
  3941. within the context of the same process. That leads to data corruption
  3942. (typically manifested as missing lines) as some threads race in and
  3943. overwrite the data written by others.
  3944. @node Interrupt Input
  3945. @section Interrupt-Driven Input
  3946. @cindex interrupt-driven input
  3947. If you set the @code{O_ASYNC} status flag on a file descriptor
  3948. (@pxref{File Status Flags}), a @code{SIGIO} signal is sent whenever
  3949. input or output becomes possible on that file descriptor. The process
  3950. or process group to receive the signal can be selected by using the
  3951. @code{F_SETOWN} command to the @code{fcntl} function. If the file
  3952. descriptor is a socket, this also selects the recipient of @code{SIGURG}
  3953. signals that are delivered when out-of-band data arrives on that socket;
  3954. see @ref{Out-of-Band Data}. (@code{SIGURG} is sent in any situation
  3955. where @code{select} would report the socket as having an ``exceptional
  3956. condition''. @xref{Waiting for I/O}.)
  3957. If the file descriptor corresponds to a terminal device, then @code{SIGIO}
  3958. signals are sent to the foreground process group of the terminal.
  3959. @xref{Job Control}.
  3960. @pindex fcntl.h
  3961. The symbols in this section are defined in the header file
  3962. @file{fcntl.h}.
  3963. @deftypevr Macro int F_GETOWN
  3964. @standards{BSD, fcntl.h}
  3965. This macro is used as the @var{command} argument to @code{fcntl}, to
  3966. specify that it should get information about the process or process
  3967. group to which @code{SIGIO} signals are sent. (For a terminal, this is
  3968. actually the foreground process group ID, which you can get using
  3969. @code{tcgetpgrp}; see @ref{Terminal Access Functions}.)
  3970. The return value is interpreted as a process ID; if negative, its
  3971. absolute value is the process group ID.
  3972. The following @code{errno} error condition is defined for this command:
  3973. @table @code
  3974. @item EBADF
  3975. The @var{filedes} argument is invalid.
  3976. @end table
  3977. @end deftypevr
  3978. @deftypevr Macro int F_SETOWN
  3979. @standards{BSD, fcntl.h}
  3980. This macro is used as the @var{command} argument to @code{fcntl}, to
  3981. specify that it should set the process or process group to which
  3982. @code{SIGIO} signals are sent. This command requires a third argument
  3983. of type @code{pid_t} to be passed to @code{fcntl}, so that the form of
  3984. the call is:
  3985. @smallexample
  3986. fcntl (@var{filedes}, F_SETOWN, @var{pid})
  3987. @end smallexample
  3988. The @var{pid} argument should be a process ID. You can also pass a
  3989. negative number whose absolute value is a process group ID.
  3990. The return value from @code{fcntl} with this command is @math{-1}
  3991. in case of error and some other value if successful. The following
  3992. @code{errno} error conditions are defined for this command:
  3993. @table @code
  3994. @item EBADF
  3995. The @var{filedes} argument is invalid.
  3996. @item ESRCH
  3997. There is no process or process group corresponding to @var{pid}.
  3998. @end table
  3999. @end deftypevr
  4000. @c ??? This section could use an example program.
  4001. @node IOCTLs
  4002. @section Generic I/O Control operations
  4003. @cindex generic i/o control operations
  4004. @cindex IOCTLs
  4005. @gnusystems{} can handle most input/output operations on many different
  4006. devices and objects in terms of a few file primitives - @code{read},
  4007. @code{write} and @code{lseek}. However, most devices also have a few
  4008. peculiar operations which do not fit into this model. Such as:
  4009. @itemize @bullet
  4010. @item
  4011. Changing the character font used on a terminal.
  4012. @item
  4013. Telling a magnetic tape system to rewind or fast forward. (Since they
  4014. cannot move in byte increments, @code{lseek} is inapplicable).
  4015. @item
  4016. Ejecting a disk from a drive.
  4017. @item
  4018. Playing an audio track from a CD-ROM drive.
  4019. @item
  4020. Maintaining routing tables for a network.
  4021. @end itemize
  4022. Although some such objects such as sockets and terminals
  4023. @footnote{Actually, the terminal-specific functions are implemented with
  4024. IOCTLs on many platforms.} have special functions of their own, it would
  4025. not be practical to create functions for all these cases.
  4026. Instead these minor operations, known as @dfn{IOCTL}s, are assigned code
  4027. numbers and multiplexed through the @code{ioctl} function, defined in
  4028. @code{sys/ioctl.h}. The code numbers themselves are defined in many
  4029. different headers.
  4030. @deftypefun int ioctl (int @var{filedes}, int @var{command}, @dots{})
  4031. @standards{BSD, sys/ioctl.h}
  4032. @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
  4033. The @code{ioctl} function performs the generic I/O operation
  4034. @var{command} on @var{filedes}.
  4035. A third argument is usually present, either a single number or a pointer
  4036. to a structure. The meaning of this argument, the returned value, and
  4037. any error codes depends upon the command used. Often @math{-1} is
  4038. returned for a failure.
  4039. @end deftypefun
  4040. On some systems, IOCTLs used by different devices share the same numbers.
  4041. Thus, although use of an inappropriate IOCTL @emph{usually} only produces
  4042. an error, you should not attempt to use device-specific IOCTLs on an
  4043. unknown device.
  4044. Most IOCTLs are OS-specific and/or only used in special system utilities,
  4045. and are thus beyond the scope of this document. For an example of the use
  4046. of an IOCTL, see @ref{Out-of-Band Data}.
  4047. @node Other Low-Level I/O APIs
  4048. @section Other low-level-I/O-related functions
  4049. @deftp {Data Type} {struct pollfd}
  4050. @standards{POSIX.1,poll.h}
  4051. @end deftp
  4052. @deftp {Data Type} {struct epoll_event}
  4053. @standards{Linux,sys/epoll.h}
  4054. @end deftp
  4055. @deftypefun int poll (struct pollfd *@var{fds}, nfds_t @var{nfds}, int @var{timeout})
  4056. @manpagefunctionstub{poll,2}
  4057. @end deftypefun
  4058. @deftypefun int epoll_create (int @var{size})
  4059. @manpagefunctionstub{epoll_create,2}
  4060. @end deftypefun
  4061. @deftypefun int epoll_wait (int @var{epfd}, struct epoll_event *@var{events}, int @var{maxevents}, int @var{timeout})
  4062. @manpagefunctionstub{epoll_wait,2}
  4063. @end deftypefun